xref: /minix/minix/commands/cawf/regexp.c (revision ab712d19)
1433d6423SLionel Sambuc /*
2433d6423SLionel Sambuc  * regcomp and regexec -- regsub and regerror are elsewhere
3433d6423SLionel Sambuc  *
4433d6423SLionel Sambuc  *	Copyright (c) 1986 by University of Toronto.
5433d6423SLionel Sambuc  *	Written by Henry Spencer.  Not derived from licensed software.
6433d6423SLionel Sambuc  *
7433d6423SLionel Sambuc  *	Permission is granted to anyone to use this software for any
8433d6423SLionel Sambuc  *	purpose on any computer system, and to redistribute it freely,
9433d6423SLionel Sambuc  *	subject to the following restrictions:
10433d6423SLionel Sambuc  *
11433d6423SLionel Sambuc  *	1. The author is not responsible for the consequences of use of
12433d6423SLionel Sambuc  *		this software, no matter how awful, even if they arise
13433d6423SLionel Sambuc  *		from defects in it.
14433d6423SLionel Sambuc  *
15433d6423SLionel Sambuc  *	2. The origin of this software must not be misrepresented, either
16433d6423SLionel Sambuc  *		by explicit claim or by omission.
17433d6423SLionel Sambuc  *
18433d6423SLionel Sambuc  *	3. Altered versions must be plainly marked as such, and must not
19433d6423SLionel Sambuc  *		be misrepresented as being the original software.
20433d6423SLionel Sambuc  *
21433d6423SLionel Sambuc  * Beware that some of this code is subtly aware of the way operator
22433d6423SLionel Sambuc  * precedence is structured in regular expressions.  Serious changes in
23433d6423SLionel Sambuc  * regular-expression syntax might require a total rethink.
24433d6423SLionel Sambuc  */
25433d6423SLionel Sambuc #include <stdio.h>
26433d6423SLionel Sambuc #ifdef	UNIX
27433d6423SLionel Sambuc #ifdef	USG
28433d6423SLionel Sambuc #include <string.h>
29433d6423SLionel Sambuc #else
30433d6423SLionel Sambuc #include <strings.h>
31433d6423SLionel Sambuc #endif
32433d6423SLionel Sambuc #else
33433d6423SLionel Sambuc #include <string.h>
34433d6423SLionel Sambuc #endif
35433d6423SLionel Sambuc #include "regexp.h"
36433d6423SLionel Sambuc #include "regmagic.h"
37433d6423SLionel Sambuc #include "proto.h"
38433d6423SLionel Sambuc 
39433d6423SLionel Sambuc /*
40433d6423SLionel Sambuc  * The "internal use only" fields in regexp.h are present to pass info from
41433d6423SLionel Sambuc  * compile to execute that permits the execute phase to run lots faster on
42433d6423SLionel Sambuc  * simple cases.  They are:
43433d6423SLionel Sambuc  *
44433d6423SLionel Sambuc  * regstart	char that must begin a match; '\0' if none obvious
45433d6423SLionel Sambuc  * reganch	is the match anchored (at beginning-of-line only)?
46433d6423SLionel Sambuc  * regmust	string (pointer into program) that match must include, or NULL
47433d6423SLionel Sambuc  * regmlen	length of regmust string
48433d6423SLionel Sambuc  *
49433d6423SLionel Sambuc  * Regstart and reganch permit very fast decisions on suitable starting points
50433d6423SLionel Sambuc  * for a match, cutting down the work a lot.  Regmust permits fast rejection
51433d6423SLionel Sambuc  * of lines that cannot possibly match.  The regmust tests are costly enough
52433d6423SLionel Sambuc  * that regcomp() supplies a regmust only if the r.e. contains something
53433d6423SLionel Sambuc  * potentially expensive (at present, the only such thing detected is * or +
54433d6423SLionel Sambuc  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
55433d6423SLionel Sambuc  * supplied because the test in regexec() needs it and regcomp() is computing
56433d6423SLionel Sambuc  * it anyway.
57433d6423SLionel Sambuc  */
58433d6423SLionel Sambuc 
59433d6423SLionel Sambuc /*
60433d6423SLionel Sambuc  * Structure for regexp "program".  This is essentially a linear encoding
61433d6423SLionel Sambuc  * of a nondeterministic finite-state machine (aka syntax charts or
62433d6423SLionel Sambuc  * "railroad normal form" in parsing technology).  Each node is an opcode
63433d6423SLionel Sambuc  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
64433d6423SLionel Sambuc  * all nodes except BRANCH implement concatenation; a "next" pointer with
65433d6423SLionel Sambuc  * a BRANCH on both ends of it is connecting two alternatives.  (Here we
66433d6423SLionel Sambuc  * have one of the subtle syntax dependencies:  an individual BRANCH (as
67433d6423SLionel Sambuc  * opposed to a collection of them) is never concatenated with anything
68433d6423SLionel Sambuc  * because of operator precedence.)  The operand of some types of node is
69433d6423SLionel Sambuc  * a literal string; for others, it is a node leading into a sub-FSM.  In
70433d6423SLionel Sambuc  * particular, the operand of a BRANCH node is the first node of the branch.
71433d6423SLionel Sambuc  * (NB this is *not* a tree structure:  the tail of the branch connects
72433d6423SLionel Sambuc  * to the thing following the set of BRANCHes.)  The opcodes are:
73433d6423SLionel Sambuc  */
74433d6423SLionel Sambuc 
75433d6423SLionel Sambuc /* definition	number	opnd?	meaning */
76433d6423SLionel Sambuc #define	END	0	/* no	End of program. */
77433d6423SLionel Sambuc #define	BOL	1	/* no	Match "" at beginning of line. */
78433d6423SLionel Sambuc #define	EOL	2	/* no	Match "" at end of line. */
79433d6423SLionel Sambuc #define	ANY	3	/* no	Match any one character. */
80433d6423SLionel Sambuc #define	ANYOF	4	/* str	Match any character in this string. */
81433d6423SLionel Sambuc #define	ANYBUT	5	/* str	Match any character not in this string. */
82433d6423SLionel Sambuc #define	BRANCH	6	/* node	Match this alternative, or the next... */
83433d6423SLionel Sambuc #define	BACK	7	/* no	Match "", "next" ptr points backward. */
84433d6423SLionel Sambuc #define	EXACTLY	8	/* str	Match this string. */
85433d6423SLionel Sambuc #define	NOTHING	9	/* no	Match empty string. */
86433d6423SLionel Sambuc #define	STAR	10	/* node	Match this (simple) thing 0 or more times. */
87433d6423SLionel Sambuc #define	PLUS	11	/* node	Match this (simple) thing 1 or more times. */
88433d6423SLionel Sambuc #define	OPEN	20	/* no	Mark this point in input as start of #n. */
89433d6423SLionel Sambuc 			/*	OPEN+1 is number 1, etc. */
90433d6423SLionel Sambuc #define	CLOSE	30	/* no	Analogous to OPEN. */
91433d6423SLionel Sambuc 
92433d6423SLionel Sambuc /*
93433d6423SLionel Sambuc  * Opcode notes:
94433d6423SLionel Sambuc  *
95433d6423SLionel Sambuc  * BRANCH	The set of branches constituting a single choice are hooked
96433d6423SLionel Sambuc  *		together with their "next" pointers, since precedence prevents
97433d6423SLionel Sambuc  *		anything being concatenated to any individual branch.  The
98433d6423SLionel Sambuc  *		"next" pointer of the last BRANCH in a choice points to the
99433d6423SLionel Sambuc  *		thing following the whole choice.  This is also where the
100433d6423SLionel Sambuc  *		final "next" pointer of each individual branch points; each
101433d6423SLionel Sambuc  *		branch starts with the operand node of a BRANCH node.
102433d6423SLionel Sambuc  *
103433d6423SLionel Sambuc  * BACK		Normal "next" pointers all implicitly point forward; BACK
104433d6423SLionel Sambuc  *		exists to make loop structures possible.
105433d6423SLionel Sambuc  *
106433d6423SLionel Sambuc  * STAR,PLUS	'?', and complex '*' and '+', are implemented as circular
107433d6423SLionel Sambuc  *		BRANCH structures using BACK.  Simple cases (one character
108433d6423SLionel Sambuc  *		per match) are implemented with STAR and PLUS for speed
109433d6423SLionel Sambuc  *		and to minimize recursive plunges.
110433d6423SLionel Sambuc  *
111433d6423SLionel Sambuc  * OPEN,CLOSE	...are numbered at compile time.
112433d6423SLionel Sambuc  */
113433d6423SLionel Sambuc 
114433d6423SLionel Sambuc /*
115433d6423SLionel Sambuc  * A node is one char of opcode followed by two chars of "next" pointer.
116433d6423SLionel Sambuc  * "Next" pointers are stored as two 8-bit pieces, high order first.  The
117433d6423SLionel Sambuc  * value is a positive offset from the opcode of the node containing it.
118433d6423SLionel Sambuc  * An operand, if any, simply follows the node.  (Note that much of the
119433d6423SLionel Sambuc  * code generation knows about this implicit relationship.)
120433d6423SLionel Sambuc  *
121433d6423SLionel Sambuc  * Using two bytes for the "next" pointer is vast overkill for most things,
122433d6423SLionel Sambuc  * but allows patterns to get big without disasters.
123433d6423SLionel Sambuc  */
124433d6423SLionel Sambuc #define	OP(p)	(*(p))
125433d6423SLionel Sambuc #define	NEXT(p)	(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
126433d6423SLionel Sambuc #define	OPERAND(p)	((p) + 3)
127433d6423SLionel Sambuc 
128433d6423SLionel Sambuc /*
129433d6423SLionel Sambuc  * See regmagic.h for one further detail of program structure.
130433d6423SLionel Sambuc  */
131433d6423SLionel Sambuc 
132433d6423SLionel Sambuc 
133433d6423SLionel Sambuc /*
134433d6423SLionel Sambuc  * Utility definitions.
135433d6423SLionel Sambuc  */
136433d6423SLionel Sambuc #ifndef CHARBITS
137433d6423SLionel Sambuc #define	UCHARAT(p)	((int)*(unsigned char *)(p))
138433d6423SLionel Sambuc #else
139433d6423SLionel Sambuc #define	UCHARAT(p)	((int)*(p)&CHARBITS)
140433d6423SLionel Sambuc #endif
141433d6423SLionel Sambuc 
142433d6423SLionel Sambuc #define	FAIL(m)	{ regerror(m); return(NULL); }
143433d6423SLionel Sambuc #define	ISMULT(c)	((c) == '*' || (c) == '+' || (c) == '?')
144433d6423SLionel Sambuc #define	META	"^$.[()|?+*\\"
145433d6423SLionel Sambuc 
146433d6423SLionel Sambuc /*
147433d6423SLionel Sambuc  * Flags to be passed up and down.
148433d6423SLionel Sambuc  */
149433d6423SLionel Sambuc #define	HASWIDTH	01	/* Known never to match null string. */
150433d6423SLionel Sambuc #define	SIMPLE		02	/* Simple enough to be STAR/PLUS operand. */
151433d6423SLionel Sambuc #define	SPSTART		04	/* Starts with * or +. */
152433d6423SLionel Sambuc #define	WORST		0	/* Worst case. */
153433d6423SLionel Sambuc #define STATIC
154433d6423SLionel Sambuc /*
155433d6423SLionel Sambuc  * Global work variables for regcomp().
156433d6423SLionel Sambuc  */
157433d6423SLionel Sambuc 
158433d6423SLionel Sambuc STATIC char *regparse;		/* Input-scan pointer. */
159433d6423SLionel Sambuc STATIC int regnpar;		/* () count. */
160433d6423SLionel Sambuc STATIC unsigned char regdummy;
161433d6423SLionel Sambuc STATIC unsigned char *regcode;	/* Code-emit pointer; &regdummy = don't. */
162433d6423SLionel Sambuc STATIC long regsize;		/* Code size. */
163433d6423SLionel Sambuc 
164433d6423SLionel Sambuc /*
165433d6423SLionel Sambuc  * Forward declarations for regcomp()'s friends.
166433d6423SLionel Sambuc  */
167433d6423SLionel Sambuc STATIC unsigned char *reg(int paren, int *flagp );
168433d6423SLionel Sambuc STATIC unsigned char *regbranch(int *flagp );
169433d6423SLionel Sambuc STATIC unsigned char *regpiece(int *flagp );
170433d6423SLionel Sambuc STATIC unsigned char *regatom(int *flagp );
171433d6423SLionel Sambuc STATIC unsigned char *regnode(int op );
172433d6423SLionel Sambuc STATIC void regc(int b );
173433d6423SLionel Sambuc STATIC void reginsert(int op, unsigned char *opnd );
174433d6423SLionel Sambuc STATIC void regtail(unsigned char *p, unsigned char *val );
175433d6423SLionel Sambuc STATIC void regoptail(unsigned char *p, unsigned char *val );
176433d6423SLionel Sambuc STATIC int regtry(regexp *prog, unsigned char *string );
177433d6423SLionel Sambuc STATIC int regmatch(unsigned char *prog );
178433d6423SLionel Sambuc STATIC int regrepeat(unsigned char *p );
179433d6423SLionel Sambuc STATIC unsigned char *regnext(unsigned char *p );
180433d6423SLionel Sambuc STATIC unsigned char *regprop(unsigned char *op );
181433d6423SLionel Sambuc 
182433d6423SLionel Sambuc #ifdef STRCSPN
183433d6423SLionel Sambuc STATIC int strcspn(unsigned char *s1, unsigned char *s2 );
184433d6423SLionel Sambuc #endif
185433d6423SLionel Sambuc 
186433d6423SLionel Sambuc /*
187433d6423SLionel Sambuc  - regcomp - compile a regular expression into internal code
188433d6423SLionel Sambuc  *
189433d6423SLionel Sambuc  * We can't allocate space until we know how big the compiled form will be,
190433d6423SLionel Sambuc  * but we can't compile it (and thus know how big it is) until we've got a
191433d6423SLionel Sambuc  * place to put the code.  So we cheat:  we compile it twice, once with code
192433d6423SLionel Sambuc  * generation turned off and size counting turned on, and once "for real".
193433d6423SLionel Sambuc  * This also means that we don't allocate space until we are sure that the
194433d6423SLionel Sambuc  * thing really will compile successfully, and we never have to move the
195433d6423SLionel Sambuc  * code and thus invalidate pointers into it.  (Note that it has to be in
196433d6423SLionel Sambuc  * one piece because free() must be able to free it all.)
197433d6423SLionel Sambuc  *
198433d6423SLionel Sambuc  * Beware that the optimization-preparation code in here knows about some
199433d6423SLionel Sambuc  * of the structure of the compiled regexp.
200433d6423SLionel Sambuc  */
regcomp(char * exp)201d9494baaSJacob Adams regexp *regcomp(char *exp) {
202433d6423SLionel Sambuc 	register regexp *r;
203433d6423SLionel Sambuc 	register unsigned char *scan;
204433d6423SLionel Sambuc 	register unsigned char *longest;
205433d6423SLionel Sambuc 	register int len;
206433d6423SLionel Sambuc 	int flags;
207433d6423SLionel Sambuc 
208433d6423SLionel Sambuc 	if (exp == NULL)
209433d6423SLionel Sambuc 		FAIL("NULL argument");
210433d6423SLionel Sambuc 
211433d6423SLionel Sambuc 	/* First pass: determine size, legality. */
212433d6423SLionel Sambuc 	regparse = exp;
213433d6423SLionel Sambuc 	regnpar = 1;
214433d6423SLionel Sambuc 	regsize = 0L;
215433d6423SLionel Sambuc 	regcode = &regdummy;
216433d6423SLionel Sambuc 	regc(MAGIC);
217433d6423SLionel Sambuc 	if (reg(0, &flags) == NULL)
218433d6423SLionel Sambuc 		return(NULL);
219433d6423SLionel Sambuc 
220433d6423SLionel Sambuc 	/* Small enough for pointer-storage convention? */
221433d6423SLionel Sambuc 	if (regsize >= 32767L)		/* Probably could be 65535L. */
222433d6423SLionel Sambuc 		FAIL("regexp too big");
223433d6423SLionel Sambuc 
224433d6423SLionel Sambuc 	/* Allocate space. */
225433d6423SLionel Sambuc 	r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
226433d6423SLionel Sambuc 	if (r == NULL)
227433d6423SLionel Sambuc 		FAIL("out of space");
228433d6423SLionel Sambuc 
229433d6423SLionel Sambuc 	/* Second pass: emit code. */
230433d6423SLionel Sambuc 	regparse = exp;
231433d6423SLionel Sambuc 	regnpar = 1;
232433d6423SLionel Sambuc 	regcode = r->program;
233433d6423SLionel Sambuc 	regc(MAGIC);
234*ab712d19SDavid van Moolenbroek 	if (reg(0, &flags) == NULL) {
235*ab712d19SDavid van Moolenbroek 		free(r);
236433d6423SLionel Sambuc 		return(NULL);
237*ab712d19SDavid van Moolenbroek 	}
238433d6423SLionel Sambuc 
239433d6423SLionel Sambuc 	/* Dig out information for optimizations. */
240433d6423SLionel Sambuc 	r->regstart = '\0';	/* Worst-case defaults. */
241433d6423SLionel Sambuc 	r->reganch = 0;
242433d6423SLionel Sambuc 	r->regmust = NULL;
243433d6423SLionel Sambuc 	r->regmlen = 0;
244433d6423SLionel Sambuc 	scan = r->program+1;			/* First BRANCH. */
245433d6423SLionel Sambuc 	if (OP(regnext(scan)) == END) {		/* Only one top-level choice. */
246433d6423SLionel Sambuc 		scan = OPERAND(scan);
247433d6423SLionel Sambuc 
248433d6423SLionel Sambuc 		/* Starting-point info. */
249433d6423SLionel Sambuc 		if (OP(scan) == EXACTLY)
250433d6423SLionel Sambuc 			r->regstart = *OPERAND(scan);
251433d6423SLionel Sambuc 		else if (OP(scan) == BOL)
252433d6423SLionel Sambuc 			r->reganch++;
253433d6423SLionel Sambuc 
254433d6423SLionel Sambuc 		/*
255433d6423SLionel Sambuc 		 * If there's something expensive in the r.e., find the
256433d6423SLionel Sambuc 		 * longest literal string that must appear and make it the
257433d6423SLionel Sambuc 		 * regmust.  Resolve ties in favor of later strings, since
258433d6423SLionel Sambuc 		 * the regstart check works with the beginning of the r.e.
259433d6423SLionel Sambuc 		 * and avoiding duplication strengthens checking.  Not a
260433d6423SLionel Sambuc 		 * strong reason, but sufficient in the absence of others.
261433d6423SLionel Sambuc 		 */
262433d6423SLionel Sambuc 		if (flags&SPSTART) {
263433d6423SLionel Sambuc 			longest = NULL;
264433d6423SLionel Sambuc 			len = 0;
265433d6423SLionel Sambuc 			for (; scan != NULL; scan = regnext(scan))
266433d6423SLionel Sambuc 				if (OP(scan) == EXACTLY
267433d6423SLionel Sambuc 				&& strlen((char *)OPERAND(scan)) >= len) {
268433d6423SLionel Sambuc 					longest = OPERAND(scan);
269433d6423SLionel Sambuc 					len = strlen((char *)OPERAND(scan));
270433d6423SLionel Sambuc 				}
271433d6423SLionel Sambuc 			r->regmust = longest;
272433d6423SLionel Sambuc 			r->regmlen = len;
273433d6423SLionel Sambuc 		}
274433d6423SLionel Sambuc 	}
275433d6423SLionel Sambuc 
276433d6423SLionel Sambuc 	return(r);
277433d6423SLionel Sambuc }
278433d6423SLionel Sambuc 
279433d6423SLionel Sambuc /*
280433d6423SLionel Sambuc  - reg - regular expression, i.e. main body or parenthesized thing
281433d6423SLionel Sambuc  *
282433d6423SLionel Sambuc  * Caller must absorb opening parenthesis.
283433d6423SLionel Sambuc  *
284433d6423SLionel Sambuc  * Combining parenthesis handling with the base level of regular expression
285433d6423SLionel Sambuc  * is a trifle forced, but the need to tie the tails of the branches to what
286433d6423SLionel Sambuc  * follows makes it hard to avoid.
287433d6423SLionel Sambuc  */
reg(int paren,int * flagp)288d9494baaSJacob Adams STATIC unsigned char *reg(int paren, int *flagp) {
289d9494baaSJacob Adams /* Parenthesized? paren */
290433d6423SLionel Sambuc 	register unsigned char *ret;
291433d6423SLionel Sambuc 	register unsigned char *br;
292433d6423SLionel Sambuc 	register unsigned char *ender;
293433d6423SLionel Sambuc 	register int parno;
294433d6423SLionel Sambuc 	int flags;
295433d6423SLionel Sambuc 
296433d6423SLionel Sambuc 	*flagp = HASWIDTH;	/* Tentatively. */
297433d6423SLionel Sambuc 
298433d6423SLionel Sambuc 	/* Make an OPEN node, if parenthesized. */
299433d6423SLionel Sambuc 	if (paren) {
300433d6423SLionel Sambuc 		if (regnpar >= NSUBEXP)
301433d6423SLionel Sambuc 			FAIL("too many ()");
302433d6423SLionel Sambuc 		parno = regnpar;
303433d6423SLionel Sambuc 		regnpar++;
304433d6423SLionel Sambuc 		ret = regnode(OPEN+parno);
305433d6423SLionel Sambuc 	} else
306433d6423SLionel Sambuc 		ret = NULL;
307433d6423SLionel Sambuc 
308433d6423SLionel Sambuc 	/* Pick up the branches, linking them together. */
309433d6423SLionel Sambuc 	br = regbranch(&flags);
310433d6423SLionel Sambuc 	if (br == NULL)
311433d6423SLionel Sambuc 		return(NULL);
312433d6423SLionel Sambuc 	if (ret != NULL)
313433d6423SLionel Sambuc 		regtail(ret, br);	/* OPEN -> first. */
314433d6423SLionel Sambuc 	else
315433d6423SLionel Sambuc 		ret = br;
316433d6423SLionel Sambuc 	if (!(flags&HASWIDTH))
317433d6423SLionel Sambuc 		*flagp &= ~HASWIDTH;
318433d6423SLionel Sambuc 	*flagp |= flags&SPSTART;
319433d6423SLionel Sambuc 	while (*regparse == '|') {
320433d6423SLionel Sambuc 		regparse++;
321433d6423SLionel Sambuc 		br = regbranch(&flags);
322433d6423SLionel Sambuc 		if (br == NULL)
323433d6423SLionel Sambuc 			return(NULL);
324433d6423SLionel Sambuc 		regtail(ret, br);	/* BRANCH -> BRANCH. */
325433d6423SLionel Sambuc 		if (!(flags&HASWIDTH))
326433d6423SLionel Sambuc 			*flagp &= ~HASWIDTH;
327433d6423SLionel Sambuc 		*flagp |= flags&SPSTART;
328433d6423SLionel Sambuc 	}
329433d6423SLionel Sambuc 
330433d6423SLionel Sambuc 	/* Make a closing node, and hook it on the end. */
331433d6423SLionel Sambuc 	ender = regnode((paren) ? CLOSE+parno : END);
332433d6423SLionel Sambuc 	regtail(ret, ender);
333433d6423SLionel Sambuc 
334433d6423SLionel Sambuc 	/* Hook the tails of the branches to the closing node. */
335433d6423SLionel Sambuc 	for (br = ret; br != NULL; br = regnext(br))
336433d6423SLionel Sambuc 		regoptail(br, ender);
337433d6423SLionel Sambuc 
338433d6423SLionel Sambuc 	/* Check for proper termination. */
339433d6423SLionel Sambuc 	if (paren && *regparse++ != ')') {
340433d6423SLionel Sambuc 		FAIL("unmatched ()");
341433d6423SLionel Sambuc 	} else if (!paren && *regparse != '\0') {
342433d6423SLionel Sambuc 		if (*regparse == ')') {
343433d6423SLionel Sambuc 			FAIL("unmatched ()");
344433d6423SLionel Sambuc 		} else
345433d6423SLionel Sambuc 			FAIL("junk on end");	/* "Can't happen". */
346433d6423SLionel Sambuc 		/* NOTREACHED */
347433d6423SLionel Sambuc 	}
348433d6423SLionel Sambuc 
349433d6423SLionel Sambuc 	return(ret);
350433d6423SLionel Sambuc }
351433d6423SLionel Sambuc 
352433d6423SLionel Sambuc /*
353433d6423SLionel Sambuc  - regbranch - one alternative of an | operator
354433d6423SLionel Sambuc  *
355433d6423SLionel Sambuc  * Implements the concatenation operator.
356433d6423SLionel Sambuc  */
regbranch(int * flagp)357d9494baaSJacob Adams STATIC unsigned char *regbranch(int *flagp) {
358433d6423SLionel Sambuc 	register unsigned char *ret;
359433d6423SLionel Sambuc 	register unsigned char *chain;
360433d6423SLionel Sambuc 	register unsigned char *latest;
361433d6423SLionel Sambuc 	int flags;
362433d6423SLionel Sambuc 
363433d6423SLionel Sambuc 	*flagp = WORST;		/* Tentatively. */
364433d6423SLionel Sambuc 
365433d6423SLionel Sambuc 	ret = regnode(BRANCH);
366433d6423SLionel Sambuc 	chain = NULL;
367433d6423SLionel Sambuc 	while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
368433d6423SLionel Sambuc 		latest = regpiece(&flags);
369433d6423SLionel Sambuc 		if (latest == NULL)
370433d6423SLionel Sambuc 			return(NULL);
371433d6423SLionel Sambuc 		*flagp |= flags&HASWIDTH;
372433d6423SLionel Sambuc 		if (chain == NULL)	/* First piece. */
373433d6423SLionel Sambuc 			*flagp |= flags&SPSTART;
374433d6423SLionel Sambuc 		else
375433d6423SLionel Sambuc 			regtail(chain, latest);
376433d6423SLionel Sambuc 		chain = latest;
377433d6423SLionel Sambuc 	}
378433d6423SLionel Sambuc 	if (chain == NULL)	/* Loop ran zero times. */
379433d6423SLionel Sambuc 		(void) regnode(NOTHING);
380433d6423SLionel Sambuc 
381433d6423SLionel Sambuc 	return(ret);
382433d6423SLionel Sambuc }
383433d6423SLionel Sambuc 
384433d6423SLionel Sambuc /*
385433d6423SLionel Sambuc  - regpiece - something followed by possible [*+?]
386433d6423SLionel Sambuc  *
387433d6423SLionel Sambuc  * Note that the branching code sequences used for ? and the general cases
388433d6423SLionel Sambuc  * of * and + are somewhat optimized:  they use the same NOTHING node as
389433d6423SLionel Sambuc  * both the endmarker for their branch list and the body of the last branch.
390433d6423SLionel Sambuc  * It might seem that this node could be dispensed with entirely, but the
391433d6423SLionel Sambuc  * endmarker role is not redundant.
392433d6423SLionel Sambuc  */
regpiece(int * flagp)393d9494baaSJacob Adams STATIC unsigned char *regpiece(int *flagp) {
394433d6423SLionel Sambuc 	register unsigned char *ret;
395433d6423SLionel Sambuc 	register unsigned char op;
396433d6423SLionel Sambuc 	register unsigned char *next;
397433d6423SLionel Sambuc 	int flags;
398433d6423SLionel Sambuc 
399433d6423SLionel Sambuc 	ret = regatom(&flags);
400433d6423SLionel Sambuc 	if (ret == NULL)
401433d6423SLionel Sambuc 		return(NULL);
402433d6423SLionel Sambuc 
403433d6423SLionel Sambuc 	op = *regparse;
404433d6423SLionel Sambuc 	if (!ISMULT(op)) {
405433d6423SLionel Sambuc 		*flagp = flags;
406433d6423SLionel Sambuc 		return(ret);
407433d6423SLionel Sambuc 	}
408433d6423SLionel Sambuc 
409433d6423SLionel Sambuc 	if (!(flags&HASWIDTH) && op != '?')
410433d6423SLionel Sambuc 		FAIL("*+ operand could be empty");
411433d6423SLionel Sambuc 	*flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
412433d6423SLionel Sambuc 
413433d6423SLionel Sambuc 	if (op == '*' && (flags&SIMPLE))
414433d6423SLionel Sambuc 		reginsert(STAR, ret);
415433d6423SLionel Sambuc 	else if (op == '*') {
416433d6423SLionel Sambuc 		/* Emit x* as (x&|), where & means "self". */
417433d6423SLionel Sambuc 		reginsert(BRANCH, ret);			/* Either x */
418433d6423SLionel Sambuc 		regoptail(ret, regnode(BACK));		/* and loop */
419433d6423SLionel Sambuc 		regoptail(ret, ret);			/* back */
420433d6423SLionel Sambuc 		regtail(ret, regnode(BRANCH));		/* or */
421433d6423SLionel Sambuc 		regtail(ret, regnode(NOTHING));		/* null. */
422433d6423SLionel Sambuc 	} else if (op == '+' && (flags&SIMPLE))
423433d6423SLionel Sambuc 		reginsert(PLUS, ret);
424433d6423SLionel Sambuc 	else if (op == '+') {
425433d6423SLionel Sambuc 		/* Emit x+ as x(&|), where & means "self". */
426433d6423SLionel Sambuc 		next = regnode(BRANCH);			/* Either */
427433d6423SLionel Sambuc 		regtail(ret, next);
428433d6423SLionel Sambuc 		regtail(regnode(BACK), ret);		/* loop back */
429433d6423SLionel Sambuc 		regtail(next, regnode(BRANCH));		/* or */
430433d6423SLionel Sambuc 		regtail(ret, regnode(NOTHING));		/* null. */
431433d6423SLionel Sambuc 	} else if (op == '?') {
432433d6423SLionel Sambuc 		/* Emit x? as (x|) */
433433d6423SLionel Sambuc 		reginsert(BRANCH, ret);			/* Either x */
434433d6423SLionel Sambuc 		regtail(ret, regnode(BRANCH));		/* or */
435433d6423SLionel Sambuc 		next = regnode(NOTHING);		/* null. */
436433d6423SLionel Sambuc 		regtail(ret, next);
437433d6423SLionel Sambuc 		regoptail(ret, next);
438433d6423SLionel Sambuc 	}
439433d6423SLionel Sambuc 	regparse++;
440433d6423SLionel Sambuc 	if (ISMULT(*regparse))
441433d6423SLionel Sambuc 		FAIL("nested *?+");
442433d6423SLionel Sambuc 
443433d6423SLionel Sambuc 	return(ret);
444433d6423SLionel Sambuc }
445433d6423SLionel Sambuc 
446433d6423SLionel Sambuc /*
447433d6423SLionel Sambuc  - regatom - the lowest level
448433d6423SLionel Sambuc  *
449433d6423SLionel Sambuc  * Optimization:  gobbles an entire sequence of ordinary characters so that
450433d6423SLionel Sambuc  * it can turn them into a single node, which is smaller to store and
451433d6423SLionel Sambuc  * faster to run.  Backslashed characters are exceptions, each becoming a
452433d6423SLionel Sambuc  * separate node; the code is simpler that way and it's not worth fixing.
453433d6423SLionel Sambuc  */
regatom(int * flagp)454d9494baaSJacob Adams STATIC unsigned char *regatom(int *flagp) {
455433d6423SLionel Sambuc 	register unsigned char *ret;
456433d6423SLionel Sambuc 	int flags;
457433d6423SLionel Sambuc 
458433d6423SLionel Sambuc 	*flagp = WORST;		/* Tentatively. */
459433d6423SLionel Sambuc 
460433d6423SLionel Sambuc 	switch (*regparse++) {
461433d6423SLionel Sambuc 	case '^':
462433d6423SLionel Sambuc 		ret = regnode(BOL);
463433d6423SLionel Sambuc 		break;
464433d6423SLionel Sambuc 	case '$':
465433d6423SLionel Sambuc 		ret = regnode(EOL);
466433d6423SLionel Sambuc 		break;
467433d6423SLionel Sambuc 	case '.':
468433d6423SLionel Sambuc 		ret = regnode(ANY);
469433d6423SLionel Sambuc 		*flagp |= HASWIDTH|SIMPLE;
470433d6423SLionel Sambuc 		break;
471433d6423SLionel Sambuc 	case '[': {
472433d6423SLionel Sambuc 			register int class;
473433d6423SLionel Sambuc 			register int classend;
474433d6423SLionel Sambuc 
475433d6423SLionel Sambuc 			if (*regparse == '^') {	/* Complement of range. */
476433d6423SLionel Sambuc 				ret = regnode(ANYBUT);
477433d6423SLionel Sambuc 				regparse++;
478433d6423SLionel Sambuc 			} else
479433d6423SLionel Sambuc 				ret = regnode(ANYOF);
480433d6423SLionel Sambuc 			if (*regparse == ']' || *regparse == '-')
481433d6423SLionel Sambuc 				regc(*regparse++);
482433d6423SLionel Sambuc 			while (*regparse != '\0' && *regparse != ']') {
483433d6423SLionel Sambuc 				if (*regparse == '-') {
484433d6423SLionel Sambuc 					regparse++;
485433d6423SLionel Sambuc 					if (*regparse == ']' || *regparse == '\0')
486433d6423SLionel Sambuc 						regc('-');
487433d6423SLionel Sambuc 					else {
488433d6423SLionel Sambuc 						class = UCHARAT(regparse-2)+1;
489433d6423SLionel Sambuc 						classend = UCHARAT(regparse);
490433d6423SLionel Sambuc 						if (class > classend+1)
491433d6423SLionel Sambuc 							FAIL("invalid [] range");
492433d6423SLionel Sambuc 						for (; class <= classend; class++)
493433d6423SLionel Sambuc 							regc(class);
494433d6423SLionel Sambuc 						regparse++;
495433d6423SLionel Sambuc 					}
496433d6423SLionel Sambuc 				} else
497433d6423SLionel Sambuc 					regc(*regparse++);
498433d6423SLionel Sambuc 			}
499433d6423SLionel Sambuc 			regc('\0');
500433d6423SLionel Sambuc 			if (*regparse != ']')
501433d6423SLionel Sambuc 				FAIL("unmatched []");
502433d6423SLionel Sambuc 			regparse++;
503433d6423SLionel Sambuc 			*flagp |= HASWIDTH|SIMPLE;
504433d6423SLionel Sambuc 		}
505433d6423SLionel Sambuc 		break;
506433d6423SLionel Sambuc 	case '(':
507433d6423SLionel Sambuc 		ret = reg(1, &flags);
508433d6423SLionel Sambuc 		if (ret == NULL)
509433d6423SLionel Sambuc 			return(NULL);
510433d6423SLionel Sambuc 		*flagp |= flags&(HASWIDTH|SPSTART);
511433d6423SLionel Sambuc 		break;
512433d6423SLionel Sambuc 	case '\0':
513433d6423SLionel Sambuc 	case '|':
514433d6423SLionel Sambuc 	case ')':
515433d6423SLionel Sambuc 		FAIL("internal urp");	/* Supposed to be caught earlier. */
516433d6423SLionel Sambuc 		break;
517433d6423SLionel Sambuc 	case '?':
518433d6423SLionel Sambuc 	case '+':
519433d6423SLionel Sambuc 	case '*':
520433d6423SLionel Sambuc 		FAIL("?+* follows nothing");
521433d6423SLionel Sambuc 		break;
522433d6423SLionel Sambuc 	case '\\':
523433d6423SLionel Sambuc 		if (*regparse == '\0')
524433d6423SLionel Sambuc 			FAIL("trailing \\");
525433d6423SLionel Sambuc 		ret = regnode(EXACTLY);
526433d6423SLionel Sambuc 		regc(*regparse++);
527433d6423SLionel Sambuc 		regc('\0');
528433d6423SLionel Sambuc 		*flagp |= HASWIDTH|SIMPLE;
529433d6423SLionel Sambuc 		break;
530433d6423SLionel Sambuc 	default: {
531433d6423SLionel Sambuc 			register int len;
532433d6423SLionel Sambuc 			register unsigned char ender;
533433d6423SLionel Sambuc 
534433d6423SLionel Sambuc 			regparse--;
535433d6423SLionel Sambuc #ifdef	STRCSPN
536433d6423SLionel Sambuc 			len = strcspn(regparse, (unsigned char *)META);
537433d6423SLionel Sambuc #else
538433d6423SLionel Sambuc 			len = strcspn((char *)regparse, META);
539433d6423SLionel Sambuc #endif
540433d6423SLionel Sambuc 			if (len <= 0)
541433d6423SLionel Sambuc 				FAIL("internal disaster");
542433d6423SLionel Sambuc 			ender = *(regparse+len);
543433d6423SLionel Sambuc 			if (len > 1 && ISMULT(ender))
544433d6423SLionel Sambuc 				len--;		/* Back off clear of ?+* operand. */
545433d6423SLionel Sambuc 			*flagp |= HASWIDTH;
546433d6423SLionel Sambuc 			if (len == 1)
547433d6423SLionel Sambuc 				*flagp |= SIMPLE;
548433d6423SLionel Sambuc 			ret = regnode(EXACTLY);
549433d6423SLionel Sambuc 			while (len > 0) {
550433d6423SLionel Sambuc 				regc(*regparse++);
551433d6423SLionel Sambuc 				len--;
552433d6423SLionel Sambuc 			}
553433d6423SLionel Sambuc 			regc('\0');
554433d6423SLionel Sambuc 		}
555433d6423SLionel Sambuc 		break;
556433d6423SLionel Sambuc 	}
557433d6423SLionel Sambuc 
558433d6423SLionel Sambuc 	return(ret);
559433d6423SLionel Sambuc }
560433d6423SLionel Sambuc 
561433d6423SLionel Sambuc /*
562433d6423SLionel Sambuc  - regnode - emit a node
563433d6423SLionel Sambuc  */
regnode(int iop)564d9494baaSJacob Adams STATIC unsigned char *regnode(int iop) {
565d9494baaSJacob Adams /* Return value is location */
566433d6423SLionel Sambuc 	register unsigned char *ret;
567433d6423SLionel Sambuc 	register unsigned char *ptr;
568433d6423SLionel Sambuc 	unsigned char op;
569433d6423SLionel Sambuc 
570433d6423SLionel Sambuc 	op = (unsigned char) iop;
571433d6423SLionel Sambuc 	ret = regcode;
572433d6423SLionel Sambuc 	if (ret == &regdummy) {
573433d6423SLionel Sambuc 		regsize += 3;
574433d6423SLionel Sambuc 		return(ret);
575433d6423SLionel Sambuc 	}
576433d6423SLionel Sambuc 
577433d6423SLionel Sambuc 	ptr = ret;
578433d6423SLionel Sambuc 	*ptr++ = op;
579433d6423SLionel Sambuc 	*ptr++ = '\0';		/* Null "next" pointer. */
580433d6423SLionel Sambuc 	*ptr++ = '\0';
581433d6423SLionel Sambuc 	regcode = ptr;
582433d6423SLionel Sambuc 
583433d6423SLionel Sambuc 	return(ret);
584433d6423SLionel Sambuc }
585433d6423SLionel Sambuc 
586433d6423SLionel Sambuc /*
587433d6423SLionel Sambuc  - regc - emit (if appropriate) a byte of code
588433d6423SLionel Sambuc  */
regc(int ib)589d9494baaSJacob Adams STATIC void regc(int ib) {
590433d6423SLionel Sambuc 	unsigned char b;
591433d6423SLionel Sambuc 
592433d6423SLionel Sambuc 	b = (unsigned char) ib;
593433d6423SLionel Sambuc 	if (regcode != &regdummy)
594433d6423SLionel Sambuc 		*regcode++ = b;
595433d6423SLionel Sambuc 	else
596433d6423SLionel Sambuc 		regsize++;
597433d6423SLionel Sambuc }
598433d6423SLionel Sambuc 
599433d6423SLionel Sambuc /*
600433d6423SLionel Sambuc  - reginsert - insert an operator in front of already-emitted operand
601433d6423SLionel Sambuc  *
602433d6423SLionel Sambuc  * Means relocating the operand.
603433d6423SLionel Sambuc  */
reginsert(int iop,unsigned char * opnd)604d9494baaSJacob Adams STATIC void reginsert(int iop, unsigned char *opnd) {
605433d6423SLionel Sambuc 	register unsigned char *src;
606433d6423SLionel Sambuc 	register unsigned char *dst;
607433d6423SLionel Sambuc 	register unsigned char *place;
608433d6423SLionel Sambuc 	unsigned char op;
609433d6423SLionel Sambuc 
610433d6423SLionel Sambuc 	op = (unsigned char) iop;
611433d6423SLionel Sambuc 	if (regcode == &regdummy) {
612433d6423SLionel Sambuc 		regsize += 3;
613433d6423SLionel Sambuc 		return;
614433d6423SLionel Sambuc 	}
615433d6423SLionel Sambuc 
616433d6423SLionel Sambuc 	src = regcode;
617433d6423SLionel Sambuc 	regcode += 3;
618433d6423SLionel Sambuc 	dst = regcode;
619433d6423SLionel Sambuc 	while (src > opnd)
620433d6423SLionel Sambuc 		*--dst = *--src;
621433d6423SLionel Sambuc 
622433d6423SLionel Sambuc 	place = opnd;		/* Op node, where operand used to be. */
623433d6423SLionel Sambuc 	*place++ = op;
624433d6423SLionel Sambuc 	*place++ = '\0';
625433d6423SLionel Sambuc 	*place++ = '\0';
626433d6423SLionel Sambuc }
627433d6423SLionel Sambuc 
628433d6423SLionel Sambuc /*
629433d6423SLionel Sambuc  - regtail - set the next-pointer at the end of a node chain
630433d6423SLionel Sambuc  */
regtail(unsigned char * p,unsigned char * val)631d9494baaSJacob Adams STATIC void regtail(unsigned char *p, unsigned char *val) {
632433d6423SLionel Sambuc 	register unsigned char *scan;
633433d6423SLionel Sambuc 	register unsigned char *temp;
634433d6423SLionel Sambuc 	register int offset;
635433d6423SLionel Sambuc 
636433d6423SLionel Sambuc 	if (p == &regdummy)
637433d6423SLionel Sambuc 		return;
638433d6423SLionel Sambuc 
639433d6423SLionel Sambuc 	/* Find last node. */
640433d6423SLionel Sambuc 	scan = p;
641433d6423SLionel Sambuc 	for (;;) {
642433d6423SLionel Sambuc 		temp = regnext(scan);
643433d6423SLionel Sambuc 		if (temp == NULL)
644433d6423SLionel Sambuc 			break;
645433d6423SLionel Sambuc 		scan = temp;
646433d6423SLionel Sambuc 	}
647433d6423SLionel Sambuc 
648433d6423SLionel Sambuc 	if (OP(scan) == BACK)
649433d6423SLionel Sambuc 		offset = scan - val;
650433d6423SLionel Sambuc 	else
651433d6423SLionel Sambuc 		offset = val - scan;
652433d6423SLionel Sambuc 	*(scan+1) = (offset>>8)&0377;
653433d6423SLionel Sambuc 	*(scan+2) = offset&0377;
654433d6423SLionel Sambuc }
655433d6423SLionel Sambuc 
656433d6423SLionel Sambuc /*
657433d6423SLionel Sambuc  - regoptail - regtail on operand of first argument; nop if operandless
658433d6423SLionel Sambuc  */
regoptail(unsigned char * p,unsigned char * val)659d9494baaSJacob Adams STATIC void regoptail(unsigned char *p, unsigned char *val) {
660433d6423SLionel Sambuc 	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
661433d6423SLionel Sambuc 	if (p == NULL || p == &regdummy || OP(p) != BRANCH)
662433d6423SLionel Sambuc 		return;
663433d6423SLionel Sambuc 	regtail(OPERAND(p), val);
664433d6423SLionel Sambuc }
665433d6423SLionel Sambuc 
666433d6423SLionel Sambuc /*
667433d6423SLionel Sambuc  * regexec and friends
668433d6423SLionel Sambuc  */
669433d6423SLionel Sambuc 
670433d6423SLionel Sambuc /*
671433d6423SLionel Sambuc  * Global work variables for regexec().
672433d6423SLionel Sambuc  */
673433d6423SLionel Sambuc STATIC unsigned char *reginput;		/* String-input pointer. */
674433d6423SLionel Sambuc STATIC unsigned char *regbol;		/* Beginning of input, for ^ check. */
675433d6423SLionel Sambuc STATIC unsigned char **regstartp;	/* Pointer to startp array. */
676433d6423SLionel Sambuc STATIC unsigned char **regendp;		/* Ditto for endp. */
677433d6423SLionel Sambuc 
678433d6423SLionel Sambuc #ifdef DEBUG
679433d6423SLionel Sambuc int regnarrate = 0;
680d9494baaSJacob Adams void regdump(void);
681d9494baaSJacob Adams STATIC unsigned char *regprop(void);
682433d6423SLionel Sambuc #endif
683433d6423SLionel Sambuc 
684433d6423SLionel Sambuc /*
685433d6423SLionel Sambuc  - regexec - match a regexp against a string
686433d6423SLionel Sambuc  */
687433d6423SLionel Sambuc int
regexec(register regexp * prog,register unsigned char * string)688d9494baaSJacob Adams regexec(register regexp *prog, register unsigned char *string) {
689433d6423SLionel Sambuc 	register unsigned char *s;
690433d6423SLionel Sambuc #ifndef	STDLIB
691433d6423SLionel Sambuc 	extern char *strchr();
692433d6423SLionel Sambuc #endif
693433d6423SLionel Sambuc 
694433d6423SLionel Sambuc 	/* Be paranoid... */
695433d6423SLionel Sambuc 	if (prog == NULL || string == NULL) {
696433d6423SLionel Sambuc 		regerror("NULL parameter");
697433d6423SLionel Sambuc 		return(0);
698433d6423SLionel Sambuc 	}
699433d6423SLionel Sambuc 
700433d6423SLionel Sambuc 	/* Check validity of program. */
701433d6423SLionel Sambuc 	if (UCHARAT(prog->program) != MAGIC) {
702433d6423SLionel Sambuc 		regerror("corrupted program");
703433d6423SLionel Sambuc 		return(0);
704433d6423SLionel Sambuc 	}
705433d6423SLionel Sambuc 
706433d6423SLionel Sambuc 	/* If there is a "must appear" string, look for it. */
707433d6423SLionel Sambuc 	if (prog->regmust != NULL) {
708433d6423SLionel Sambuc 		s = string;
709433d6423SLionel Sambuc 		while ((s = (unsigned char *)strchr((char *)s,prog->regmust[0]))
710433d6423SLionel Sambuc 		!= NULL) {
711433d6423SLionel Sambuc 			if (strncmp((char *)s, (char *)prog->regmust,
712433d6423SLionel Sambuc 			    prog->regmlen)
713433d6423SLionel Sambuc 			== 0)
714433d6423SLionel Sambuc 				break;	/* Found it. */
715433d6423SLionel Sambuc 			s++;
716433d6423SLionel Sambuc 		}
717433d6423SLionel Sambuc 		if (s == NULL)	/* Not present. */
718433d6423SLionel Sambuc 			return(0);
719433d6423SLionel Sambuc 	}
720433d6423SLionel Sambuc 
721433d6423SLionel Sambuc 	/* Mark beginning of line for ^ . */
722433d6423SLionel Sambuc 	regbol = string;
723433d6423SLionel Sambuc 
724433d6423SLionel Sambuc 	/* Simplest case:  anchored match need be tried only once. */
725433d6423SLionel Sambuc 	if (prog->reganch)
726433d6423SLionel Sambuc 		return(regtry(prog, string));
727433d6423SLionel Sambuc 
728433d6423SLionel Sambuc 	/* Messy cases:  unanchored match. */
729433d6423SLionel Sambuc 	s = string;
730433d6423SLionel Sambuc 	if (prog->regstart != '\0')
731433d6423SLionel Sambuc 		/* We know what char it must start with. */
732433d6423SLionel Sambuc 		while ((s = (unsigned char *)strchr((char *)s, prog->regstart))
733433d6423SLionel Sambuc 		!= NULL) {
734433d6423SLionel Sambuc 			if (regtry(prog, s))
735433d6423SLionel Sambuc 				return(1);
736433d6423SLionel Sambuc 			s++;
737433d6423SLionel Sambuc 		}
738433d6423SLionel Sambuc 	else
739433d6423SLionel Sambuc 		/* We don't -- general case. */
740433d6423SLionel Sambuc 		do {
741433d6423SLionel Sambuc 			if (regtry(prog, s))
742433d6423SLionel Sambuc 				return(1);
743433d6423SLionel Sambuc 		} while (*s++ != '\0');
744433d6423SLionel Sambuc 
745433d6423SLionel Sambuc 	/* Failure. */
746433d6423SLionel Sambuc 	return(0);
747433d6423SLionel Sambuc }
748433d6423SLionel Sambuc 
749433d6423SLionel Sambuc /*
750433d6423SLionel Sambuc  - regtry - try match at specific point
751433d6423SLionel Sambuc  */
regtry(regexp * prog,unsigned char * string)752d9494baaSJacob Adams STATIC int regtry(regexp *prog, unsigned char *string) {
753d9494baaSJacob Adams /* Return value 0 failure, 1 success */
754433d6423SLionel Sambuc 	register int i;
755433d6423SLionel Sambuc 	register unsigned char **sp;
756433d6423SLionel Sambuc 	register unsigned char **ep;
757433d6423SLionel Sambuc 
758433d6423SLionel Sambuc 	reginput = string;
759433d6423SLionel Sambuc 	regstartp = prog->startp;
760433d6423SLionel Sambuc 	regendp = prog->endp;
761433d6423SLionel Sambuc 
762433d6423SLionel Sambuc 	sp = prog->startp;
763433d6423SLionel Sambuc 	ep = prog->endp;
764433d6423SLionel Sambuc 	for (i = NSUBEXP; i > 0; i--) {
765433d6423SLionel Sambuc 		*sp++ = NULL;
766433d6423SLionel Sambuc 		*ep++ = NULL;
767433d6423SLionel Sambuc 	}
768433d6423SLionel Sambuc 	if (regmatch(prog->program + 1)) {
769433d6423SLionel Sambuc 		prog->startp[0] = string;
770433d6423SLionel Sambuc 		prog->endp[0] = reginput;
771433d6423SLionel Sambuc 		return(1);
772433d6423SLionel Sambuc 	} else
773433d6423SLionel Sambuc 		return(0);
774433d6423SLionel Sambuc }
775433d6423SLionel Sambuc 
776433d6423SLionel Sambuc /*
777433d6423SLionel Sambuc  - regmatch - main matching routine
778433d6423SLionel Sambuc  *
779433d6423SLionel Sambuc  * Conceptually the strategy is simple:  check to see whether the current
780433d6423SLionel Sambuc  * node matches, call self recursively to see whether the rest matches,
781433d6423SLionel Sambuc  * and then act accordingly.  In practice we make some effort to avoid
782433d6423SLionel Sambuc  * recursion, in particular by going through "ordinary" nodes (that don't
783433d6423SLionel Sambuc  * need to know whether the rest of the match failed) by a loop instead of
784433d6423SLionel Sambuc  * by recursion.
785433d6423SLionel Sambuc  */
regmatch(unsigned char * prog)786d9494baaSJacob Adams STATIC int regmatch( unsigned char *prog) {
787d9494baaSJacob Adams /* Return value 0 failure 1 success */
788433d6423SLionel Sambuc 	register unsigned char *scan;	/* Current node. */
789433d6423SLionel Sambuc 	unsigned char *next;		/* Next node. */
790433d6423SLionel Sambuc #ifndef	STDLIB
791433d6423SLionel Sambuc 	extern char *strchr();
792433d6423SLionel Sambuc #endif
793433d6423SLionel Sambuc 
794433d6423SLionel Sambuc 	scan = prog;
795433d6423SLionel Sambuc #ifdef DEBUG
796433d6423SLionel Sambuc 	if (scan != NULL && regnarrate)
797433d6423SLionel Sambuc 		fprintf(stderr, "%s(\n", regprop(scan));
798433d6423SLionel Sambuc #endif
799433d6423SLionel Sambuc 	while (scan != NULL) {
800433d6423SLionel Sambuc #ifdef DEBUG
801433d6423SLionel Sambuc 		if (regnarrate)
802433d6423SLionel Sambuc 			fprintf(stderr, "%s...\n", regprop(scan));
803433d6423SLionel Sambuc #endif
804433d6423SLionel Sambuc 		next = regnext(scan);
805433d6423SLionel Sambuc 
806433d6423SLionel Sambuc 		switch (OP(scan)) {
807433d6423SLionel Sambuc 		case BOL:
808433d6423SLionel Sambuc 			if (reginput != regbol)
809433d6423SLionel Sambuc 				return(0);
810433d6423SLionel Sambuc 			break;
811433d6423SLionel Sambuc 		case EOL:
812433d6423SLionel Sambuc 			if (*reginput != '\0')
813433d6423SLionel Sambuc 				return(0);
814433d6423SLionel Sambuc 			break;
815433d6423SLionel Sambuc 		case ANY:
816433d6423SLionel Sambuc 			if (*reginput == '\0')
817433d6423SLionel Sambuc 				return(0);
818433d6423SLionel Sambuc 			reginput++;
819433d6423SLionel Sambuc 			break;
820433d6423SLionel Sambuc 		case EXACTLY: {
821433d6423SLionel Sambuc 				register int len;
822433d6423SLionel Sambuc 				register unsigned char *opnd;
823433d6423SLionel Sambuc 
824433d6423SLionel Sambuc 				opnd = OPERAND(scan);
825433d6423SLionel Sambuc 				/* Inline the first character, for speed. */
826433d6423SLionel Sambuc 				if (*opnd != *reginput)
827433d6423SLionel Sambuc 					return(0);
828433d6423SLionel Sambuc 				len = strlen((char *)opnd);
829433d6423SLionel Sambuc 				if (len > 1 && strncmp((char *)opnd,
830433d6423SLionel Sambuc 				   (char *)reginput, len)
831433d6423SLionel Sambuc 				!= 0)
832433d6423SLionel Sambuc 					return(0);
833433d6423SLionel Sambuc 				reginput += len;
834433d6423SLionel Sambuc 			}
835433d6423SLionel Sambuc 			break;
836433d6423SLionel Sambuc 		case ANYOF:
837433d6423SLionel Sambuc 			if (*reginput == '\0'
838433d6423SLionel Sambuc 			|| strchr((char *)OPERAND(scan), *reginput) == NULL)
839433d6423SLionel Sambuc 				return(0);
840433d6423SLionel Sambuc 			reginput++;
841433d6423SLionel Sambuc 			break;
842433d6423SLionel Sambuc 		case ANYBUT:
843433d6423SLionel Sambuc 			if (*reginput == '\0'
844433d6423SLionel Sambuc 			|| strchr((char *)OPERAND(scan), *reginput) != NULL)
845433d6423SLionel Sambuc 				return(0);
846433d6423SLionel Sambuc 			reginput++;
847433d6423SLionel Sambuc 			break;
848433d6423SLionel Sambuc 		case NOTHING:
849433d6423SLionel Sambuc 			break;
850433d6423SLionel Sambuc 		case BACK:
851433d6423SLionel Sambuc 			break;
852433d6423SLionel Sambuc 		case OPEN+1:
853433d6423SLionel Sambuc 		case OPEN+2:
854433d6423SLionel Sambuc 		case OPEN+3:
855433d6423SLionel Sambuc 		case OPEN+4:
856433d6423SLionel Sambuc 		case OPEN+5:
857433d6423SLionel Sambuc 		case OPEN+6:
858433d6423SLionel Sambuc 		case OPEN+7:
859433d6423SLionel Sambuc 		case OPEN+8:
860433d6423SLionel Sambuc 		case OPEN+9: {
861433d6423SLionel Sambuc 				register int no;
862433d6423SLionel Sambuc 				register unsigned char *save;
863433d6423SLionel Sambuc 
864433d6423SLionel Sambuc 				no = OP(scan) - OPEN;
865433d6423SLionel Sambuc 				save = reginput;
866433d6423SLionel Sambuc 
867433d6423SLionel Sambuc 				if (regmatch(next)) {
868433d6423SLionel Sambuc 					/*
869433d6423SLionel Sambuc 					 * Don't set startp if some later
870433d6423SLionel Sambuc 					 * invocation of the same parentheses
871433d6423SLionel Sambuc 					 * already has.
872433d6423SLionel Sambuc 					 */
873433d6423SLionel Sambuc 					if (regstartp[no] == NULL)
874433d6423SLionel Sambuc 						regstartp[no] = save;
875433d6423SLionel Sambuc 					return(1);
876433d6423SLionel Sambuc 				} else
877433d6423SLionel Sambuc 					return(0);
878433d6423SLionel Sambuc 			}
879433d6423SLionel Sambuc 			break;
880433d6423SLionel Sambuc 		case CLOSE+1:
881433d6423SLionel Sambuc 		case CLOSE+2:
882433d6423SLionel Sambuc 		case CLOSE+3:
883433d6423SLionel Sambuc 		case CLOSE+4:
884433d6423SLionel Sambuc 		case CLOSE+5:
885433d6423SLionel Sambuc 		case CLOSE+6:
886433d6423SLionel Sambuc 		case CLOSE+7:
887433d6423SLionel Sambuc 		case CLOSE+8:
888433d6423SLionel Sambuc 		case CLOSE+9: {
889433d6423SLionel Sambuc 				register int no;
890433d6423SLionel Sambuc 				register unsigned char *save;
891433d6423SLionel Sambuc 
892433d6423SLionel Sambuc 				no = OP(scan) - CLOSE;
893433d6423SLionel Sambuc 				save = reginput;
894433d6423SLionel Sambuc 
895433d6423SLionel Sambuc 				if (regmatch(next)) {
896433d6423SLionel Sambuc 					/*
897433d6423SLionel Sambuc 					 * Don't set endp if some later
898433d6423SLionel Sambuc 					 * invocation of the same parentheses
899433d6423SLionel Sambuc 					 * already has.
900433d6423SLionel Sambuc 					 */
901433d6423SLionel Sambuc 					if (regendp[no] == NULL)
902433d6423SLionel Sambuc 						regendp[no] = save;
903433d6423SLionel Sambuc 					return(1);
904433d6423SLionel Sambuc 				} else
905433d6423SLionel Sambuc 					return(0);
906433d6423SLionel Sambuc 			}
907433d6423SLionel Sambuc 			break;
908433d6423SLionel Sambuc 		case BRANCH: {
909433d6423SLionel Sambuc 				register unsigned char *save;
910433d6423SLionel Sambuc 
911433d6423SLionel Sambuc 				if (OP(next) != BRANCH)		/* No choice. */
912433d6423SLionel Sambuc 					next = OPERAND(scan);	/* Avoid recursion. */
913433d6423SLionel Sambuc 				else {
914433d6423SLionel Sambuc 					do {
915433d6423SLionel Sambuc 						save = reginput;
916433d6423SLionel Sambuc 						if (regmatch(OPERAND(scan)))
917433d6423SLionel Sambuc 							return(1);
918433d6423SLionel Sambuc 						reginput = save;
919433d6423SLionel Sambuc 						scan = regnext(scan);
920433d6423SLionel Sambuc 					} while (scan != NULL && OP(scan) == BRANCH);
921433d6423SLionel Sambuc 					return(0);
922433d6423SLionel Sambuc 					/* NOTREACHED */
923433d6423SLionel Sambuc 				}
924433d6423SLionel Sambuc 			}
925433d6423SLionel Sambuc 			break;
926433d6423SLionel Sambuc 		case STAR:
927433d6423SLionel Sambuc 		case PLUS: {
928433d6423SLionel Sambuc 				register unsigned char nextch;
929433d6423SLionel Sambuc 				register int no;
930433d6423SLionel Sambuc 				register unsigned char *save;
931433d6423SLionel Sambuc 				register int min;
932433d6423SLionel Sambuc 
933433d6423SLionel Sambuc 				/*
934433d6423SLionel Sambuc 				 * Lookahead to avoid useless match attempts
935433d6423SLionel Sambuc 				 * when we know what character comes next.
936433d6423SLionel Sambuc 				 */
937433d6423SLionel Sambuc 				nextch = '\0';
938433d6423SLionel Sambuc 				if (OP(next) == EXACTLY)
939433d6423SLionel Sambuc 					nextch = *OPERAND(next);
940433d6423SLionel Sambuc 				min = (OP(scan) == STAR) ? 0 : 1;
941433d6423SLionel Sambuc 				save = reginput;
942433d6423SLionel Sambuc 				no = regrepeat(OPERAND(scan));
943433d6423SLionel Sambuc 				while (no >= min) {
944433d6423SLionel Sambuc 					/* If it could work, try it. */
945433d6423SLionel Sambuc 					if (nextch == '\0' || *reginput == nextch)
946433d6423SLionel Sambuc 						if (regmatch(next))
947433d6423SLionel Sambuc 							return(1);
948433d6423SLionel Sambuc 					/* Couldn't or didn't -- back up. */
949433d6423SLionel Sambuc 					no--;
950433d6423SLionel Sambuc 					reginput = save + no;
951433d6423SLionel Sambuc 				}
952433d6423SLionel Sambuc 				return(0);
953433d6423SLionel Sambuc 			}
954433d6423SLionel Sambuc 			break;
955433d6423SLionel Sambuc 		case END:
956433d6423SLionel Sambuc 			return(1);	/* Success! */
957433d6423SLionel Sambuc 			break;
958433d6423SLionel Sambuc 		default:
959433d6423SLionel Sambuc 			regerror("memory corruption");
960433d6423SLionel Sambuc 			return(0);
961433d6423SLionel Sambuc 			break;
962433d6423SLionel Sambuc 		}
963433d6423SLionel Sambuc 
964433d6423SLionel Sambuc 		scan = next;
965433d6423SLionel Sambuc 	}
966433d6423SLionel Sambuc 
967433d6423SLionel Sambuc 	/*
968433d6423SLionel Sambuc 	 * We get here only if there's trouble -- normally "case END" is
969433d6423SLionel Sambuc 	 * the terminating point.
970433d6423SLionel Sambuc 	 */
971433d6423SLionel Sambuc 	regerror("corrupted pointers");
972433d6423SLionel Sambuc 	return(0);
973433d6423SLionel Sambuc }
974433d6423SLionel Sambuc 
975433d6423SLionel Sambuc /*
976433d6423SLionel Sambuc  - regrepeat - repeatedly match something simple, report how many
977433d6423SLionel Sambuc  */
regrepeat(unsigned char * p)978d9494baaSJacob Adams STATIC int regrepeat(unsigned char *p) {
979433d6423SLionel Sambuc 	register int count = 0;
980433d6423SLionel Sambuc 	register unsigned char *scan;
981433d6423SLionel Sambuc 	register unsigned char *opnd;
982433d6423SLionel Sambuc 
983433d6423SLionel Sambuc 	scan = reginput;
984433d6423SLionel Sambuc 	opnd = OPERAND(p);
985433d6423SLionel Sambuc 	switch (OP(p)) {
986433d6423SLionel Sambuc 	case ANY:
987433d6423SLionel Sambuc 		count = strlen((char *)scan);
988433d6423SLionel Sambuc 		scan += count;
989433d6423SLionel Sambuc 		break;
990433d6423SLionel Sambuc 	case EXACTLY:
991433d6423SLionel Sambuc 		while (*opnd == *scan) {
992433d6423SLionel Sambuc 			count++;
993433d6423SLionel Sambuc 			scan++;
994433d6423SLionel Sambuc 		}
995433d6423SLionel Sambuc 		break;
996433d6423SLionel Sambuc 	case ANYOF:
997433d6423SLionel Sambuc 		while (*scan != '\0' && strchr((char *)opnd, *scan) != NULL) {
998433d6423SLionel Sambuc 			count++;
999433d6423SLionel Sambuc 			scan++;
1000433d6423SLionel Sambuc 		}
1001433d6423SLionel Sambuc 		break;
1002433d6423SLionel Sambuc 	case ANYBUT:
1003433d6423SLionel Sambuc 		while (*scan != '\0' && strchr((char *)opnd, *scan) == NULL) {
1004433d6423SLionel Sambuc 			count++;
1005433d6423SLionel Sambuc 			scan++;
1006433d6423SLionel Sambuc 		}
1007433d6423SLionel Sambuc 		break;
1008433d6423SLionel Sambuc 	default:		/* Oh dear.  Called inappropriately. */
1009433d6423SLionel Sambuc 		regerror("internal foulup");
1010433d6423SLionel Sambuc 		count = 0;	/* Best compromise. */
1011433d6423SLionel Sambuc 		break;
1012433d6423SLionel Sambuc 	}
1013433d6423SLionel Sambuc 	reginput = scan;
1014433d6423SLionel Sambuc 
1015433d6423SLionel Sambuc 	return(count);
1016433d6423SLionel Sambuc }
1017433d6423SLionel Sambuc 
1018433d6423SLionel Sambuc /*
1019433d6423SLionel Sambuc  - regnext - dig the "next" pointer out of a node
1020433d6423SLionel Sambuc  */
regnext(register unsigned char * p)1021d9494baaSJacob Adams STATIC unsigned char *regnext(register unsigned char *p) {
1022433d6423SLionel Sambuc 	register int offset;
1023433d6423SLionel Sambuc 
1024433d6423SLionel Sambuc 	if (p == &regdummy)
1025433d6423SLionel Sambuc 		return(NULL);
1026433d6423SLionel Sambuc 
1027433d6423SLionel Sambuc 	offset = NEXT(p);
1028433d6423SLionel Sambuc 	if (offset == 0)
1029433d6423SLionel Sambuc 		return(NULL);
1030433d6423SLionel Sambuc 
1031433d6423SLionel Sambuc 	if (OP(p) == BACK)
1032433d6423SLionel Sambuc 		return(p-offset);
1033433d6423SLionel Sambuc 	else
1034433d6423SLionel Sambuc 		return(p+offset);
1035433d6423SLionel Sambuc }
1036433d6423SLionel Sambuc 
1037433d6423SLionel Sambuc #ifdef DEBUG
1038433d6423SLionel Sambuc 
1039d9494baaSJacob Adams STATIC unsigned char *regprop(void);
1040433d6423SLionel Sambuc 
1041433d6423SLionel Sambuc /*
1042433d6423SLionel Sambuc  - regdump - dump a regexp onto stdout in vaguely comprehensible form
1043433d6423SLionel Sambuc  */
regdump(regexp r)1044d9494baaSJacob Adams void regdump(regexp r) {
1045433d6423SLionel Sambuc 	register unsigned char *s;
1046433d6423SLionel Sambuc 	register unsigned char op = EXACTLY;	/* Arbitrary non-END op. */
1047433d6423SLionel Sambuc 	register unsigned char *next;
1048433d6423SLionel Sambuc 	extern char *strchr();
1049433d6423SLionel Sambuc 
1050433d6423SLionel Sambuc 
1051433d6423SLionel Sambuc 	s = r->program + 1;
1052433d6423SLionel Sambuc 	while (op != END) {	/* While that wasn't END last time... */
1053433d6423SLionel Sambuc 		op = OP(s);
1054433d6423SLionel Sambuc 		printf("%2d%s", s-r->program, regprop(s));	/* Where, what. */
1055433d6423SLionel Sambuc 		next = regnext(s);
1056433d6423SLionel Sambuc 		if (next == NULL)		/* Next ptr. */
1057433d6423SLionel Sambuc 			printf("(0)");
1058433d6423SLionel Sambuc 		else
1059433d6423SLionel Sambuc 			printf("(%d)", (s-r->program)+(next-s));
1060433d6423SLionel Sambuc 		s += 3;
1061433d6423SLionel Sambuc 		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
1062433d6423SLionel Sambuc 			/* Literal string, where present. */
1063433d6423SLionel Sambuc 			while (*s != '\0') {
1064433d6423SLionel Sambuc 				putchar(*s);
1065433d6423SLionel Sambuc 				s++;
1066433d6423SLionel Sambuc 			}
1067433d6423SLionel Sambuc 			s++;
1068433d6423SLionel Sambuc 		}
1069433d6423SLionel Sambuc 		putchar('\n');
1070433d6423SLionel Sambuc 	}
1071433d6423SLionel Sambuc 
1072433d6423SLionel Sambuc 	/* Header fields of interest. */
1073433d6423SLionel Sambuc 	if (r->regstart != '\0')
1074433d6423SLionel Sambuc 		printf("start `%c' ", r->regstart);
1075433d6423SLionel Sambuc 	if (r->reganch)
1076433d6423SLionel Sambuc 		printf("anchored ");
1077433d6423SLionel Sambuc 	if (r->regmust != NULL)
1078433d6423SLionel Sambuc 		printf("must have \"%s\"", r->regmust);
1079433d6423SLionel Sambuc 	printf("\n");
1080433d6423SLionel Sambuc }
1081433d6423SLionel Sambuc 
1082433d6423SLionel Sambuc /*
1083433d6423SLionel Sambuc  - regprop - printable representation of opcode
1084433d6423SLionel Sambuc  */
1085c6748a4aSJacob Adams STATIC unsigned char regprop_buf[50];
1086d9494baaSJacob Adams 
regprop(unsigned char * op)1087d9494baaSJacob Adams STATIC unsigned char *regprop(unsigned char *op) {
1088433d6423SLionel Sambuc 	register unsigned char *p;
1089433d6423SLionel Sambuc 
1090c6748a4aSJacob Adams 	(void) strcpy(regprop_buf, ":");
1091433d6423SLionel Sambuc 
1092433d6423SLionel Sambuc 	switch (OP(op)) {
1093433d6423SLionel Sambuc 	case BOL:
1094433d6423SLionel Sambuc 		p = "BOL";
1095433d6423SLionel Sambuc 		break;
1096433d6423SLionel Sambuc 	case EOL:
1097433d6423SLionel Sambuc 		p = "EOL";
1098433d6423SLionel Sambuc 		break;
1099433d6423SLionel Sambuc 	case ANY:
1100433d6423SLionel Sambuc 		p = "ANY";
1101433d6423SLionel Sambuc 		break;
1102433d6423SLionel Sambuc 	case ANYOF:
1103433d6423SLionel Sambuc 		p = "ANYOF";
1104433d6423SLionel Sambuc 		break;
1105433d6423SLionel Sambuc 	case ANYBUT:
1106433d6423SLionel Sambuc 		p = "ANYBUT";
1107433d6423SLionel Sambuc 		break;
1108433d6423SLionel Sambuc 	case BRANCH:
1109433d6423SLionel Sambuc 		p = "BRANCH";
1110433d6423SLionel Sambuc 		break;
1111433d6423SLionel Sambuc 	case EXACTLY:
1112433d6423SLionel Sambuc 		p = "EXACTLY";
1113433d6423SLionel Sambuc 		break;
1114433d6423SLionel Sambuc 	case NOTHING:
1115433d6423SLionel Sambuc 		p = "NOTHING";
1116433d6423SLionel Sambuc 		break;
1117433d6423SLionel Sambuc 	case BACK:
1118433d6423SLionel Sambuc 		p = "BACK";
1119433d6423SLionel Sambuc 		break;
1120433d6423SLionel Sambuc 	case END:
1121433d6423SLionel Sambuc 		p = "END";
1122433d6423SLionel Sambuc 		break;
1123433d6423SLionel Sambuc 	case OPEN+1:
1124433d6423SLionel Sambuc 	case OPEN+2:
1125433d6423SLionel Sambuc 	case OPEN+3:
1126433d6423SLionel Sambuc 	case OPEN+4:
1127433d6423SLionel Sambuc 	case OPEN+5:
1128433d6423SLionel Sambuc 	case OPEN+6:
1129433d6423SLionel Sambuc 	case OPEN+7:
1130433d6423SLionel Sambuc 	case OPEN+8:
1131433d6423SLionel Sambuc 	case OPEN+9:
1132c6748a4aSJacob Adams 		sprintf(regprop_buf+strlen(regprop_buf), "OPEN%d", OP(op)-OPEN);
1133433d6423SLionel Sambuc 		p = NULL;
1134433d6423SLionel Sambuc 		break;
1135433d6423SLionel Sambuc 	case CLOSE+1:
1136433d6423SLionel Sambuc 	case CLOSE+2:
1137433d6423SLionel Sambuc 	case CLOSE+3:
1138433d6423SLionel Sambuc 	case CLOSE+4:
1139433d6423SLionel Sambuc 	case CLOSE+5:
1140433d6423SLionel Sambuc 	case CLOSE+6:
1141433d6423SLionel Sambuc 	case CLOSE+7:
1142433d6423SLionel Sambuc 	case CLOSE+8:
1143433d6423SLionel Sambuc 	case CLOSE+9:
1144c6748a4aSJacob Adams 		sprintf(regprop_buf+strlen(regprop_buf), "CLOSE%d", OP(op)-CLOSE);
1145433d6423SLionel Sambuc 		p = NULL;
1146433d6423SLionel Sambuc 		break;
1147433d6423SLionel Sambuc 	case STAR:
1148433d6423SLionel Sambuc 		p = "STAR";
1149433d6423SLionel Sambuc 		break;
1150433d6423SLionel Sambuc 	case PLUS:
1151433d6423SLionel Sambuc 		p = "PLUS";
1152433d6423SLionel Sambuc 		break;
1153433d6423SLionel Sambuc 	default:
1154433d6423SLionel Sambuc 		regerror("corrupted opcode");
1155433d6423SLionel Sambuc 		break;
1156433d6423SLionel Sambuc 	}
1157433d6423SLionel Sambuc 	if (p != NULL)
1158c6748a4aSJacob Adams 		(void) strcat(regprop_buf, p);
1159c6748a4aSJacob Adams 	return(regprop_buf);
1160433d6423SLionel Sambuc }
1161433d6423SLionel Sambuc #endif
1162433d6423SLionel Sambuc 
1163433d6423SLionel Sambuc /*
1164433d6423SLionel Sambuc  * The following is provided for those people who do not have strcspn() in
1165433d6423SLionel Sambuc  * their C libraries.  They should get off their butts and do something
1166433d6423SLionel Sambuc  * about it; at least one public-domain implementation of those (highly
1167433d6423SLionel Sambuc  * useful) string routines has been published on Usenet.
1168433d6423SLionel Sambuc  */
1169433d6423SLionel Sambuc #ifdef STRCSPN
1170433d6423SLionel Sambuc /*
1171433d6423SLionel Sambuc  * strcspn - find length of initial segment of s1 consisting entirely
1172433d6423SLionel Sambuc  * of characters not from s2
1173433d6423SLionel Sambuc  */
1174433d6423SLionel Sambuc 
strcspn(unsigned char * s1,unsigned char * s2)1175d9494baaSJacob Adams STATIC int strcspn(unsigned char *s1, unsigned char *s2) {
1176433d6423SLionel Sambuc 	register unsigned char *scan1;
1177433d6423SLionel Sambuc 	register unsigned char *scan2;
1178433d6423SLionel Sambuc 	register int count;
1179433d6423SLionel Sambuc 
1180433d6423SLionel Sambuc 	count = 0;
1181433d6423SLionel Sambuc 	for (scan1 = s1; *scan1 != '\0'; scan1++) {
1182433d6423SLionel Sambuc 		for (scan2 = s2; *scan2 != '\0';)	/* ++ moved down. */
1183433d6423SLionel Sambuc 			if (*scan1 == *scan2++)
1184433d6423SLionel Sambuc 				return(count);
1185433d6423SLionel Sambuc 		count++;
1186433d6423SLionel Sambuc 	}
1187433d6423SLionel Sambuc 	return(count);
1188433d6423SLionel Sambuc }
1189433d6423SLionel Sambuc #endif
1190