1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Mozilla Public License Version
5  * 1.1 (the "License"); you may not use this file except in compliance with
6  * the License. You may obtain a copy of the License at
7  * http://www.mozilla.org/MPL/
8  *
9  * Software distributed under the License is distributed on an "AS IS" basis,
10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11  * for the specific language governing rights and limitations under the
12  * License.
13  *
14  * The Original Code is Mozilla Communicator client code, released
15  * March 31, 1998.
16  *
17  * The Initial Developer of the Original Code is
18  * Netscape Communications Corporation.
19  * Portions created by the Initial Developer are Copyright (C) 1998-1999
20  * the Initial Developer. All Rights Reserved.
21  *
22  * Contributor(s):
23  *
24  * Alternatively, the contents of this file may be used under the terms of
25  * either of the GNU General Public License Version 2 or later (the "GPL"),
26  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27  * in which case the provisions of the GPL or the LGPL are applicable instead
28  * of those above. If you wish to allow use of your version of this file only
29  * under the terms of either the GPL or the LGPL, and not to allow others to
30  * use your version of this file under the terms of the MPL, indicate your
31  * decision by deleting the provisions above and replace them with the notice
32  * and other provisions required by the GPL or the LGPL. If you do not delete
33  * the provisions above, a recipient may use your version of this file under
34  * the terms of any one of the MPL, the GPL or the LGPL.
35  *
36  * ***** END LICENSE BLOCK ***** */
37 #include "ldap-int.h"
38 #if defined( macintosh ) || defined( DOS ) || defined( _WINDOWS ) || defined( NEED_BSDREGEX ) || defined( XP_OS2)
39 #include "regex.h"
40 
41 /*
42  * regex - Regular expression pattern matching  and replacement
43  *
44  * By:  Ozan S. Yigit (oz)
45  *      Dept. of Computer Science
46  *      York University
47  *
48  * These routines are the PUBLIC DOMAIN equivalents of regex
49  * routines as found in 4.nBSD UN*X, with minor extensions.
50  *
51  * These routines are derived from various implementations found
52  * in software tools books, and Conroy's grep. They are NOT derived
53  * from licensed/restricted software.
54  * For more interesting/academic/complicated implementations,
55  * see Henry Spencer's regexp routines, or GNU Emacs pattern
56  * matching module.
57  *
58  * Use the actual CCL code in the CLO
59  * section of pmatch. No need for a recursive
60  * pmatch call.
61  *
62  * Use a bitmap table to set char bits in an
63  * 8-bit chunk.
64  *
65  * Interfaces:
66  *      re_comp:        compile a regular expression into a NFA.
67  *
68  *			char *re_comp(s)
69  *			char *s;
70  *
71  *      re_exec:        execute the NFA to match a pattern.
72  *
73  *			int re_exec(s)
74  *			char *s;
75  *
76  *	re_modw		change re_exec's understanding of what a "word"
77  *			looks like (for \< and \>) by adding into the
78  *			hidden word-syntax table.
79  *
80  *			void re_modw(s)
81  *			char *s;
82  *
83  *      re_subs:	substitute the matched portions in a new string.
84  *
85  *			int re_subs(src, dst)
86  *			char *src;
87  *			char *dst;
88  *
89  *	re_fail:	failure routine for re_exec.
90  *
91  *			void re_fail(msg, op)
92  *			char *msg;
93  *			char op;
94  *
95  * Regular Expressions:
96  *
97  *      [1]     char    matches itself, unless it is a special
98  *                      character (metachar): . \ [ ] * + ^ $
99  *
100  *      [2]     .       matches any character.
101  *
102  *      [3]     \       matches the character following it, except
103  *			when followed by a left or right round bracket,
104  *			a digit 1 to 9 or a left or right angle bracket.
105  *			(see [7], [8] and [9])
106  *			It is used as an escape character for all
107  *			other meta-characters, and itself. When used
108  *			in a set ([4]), it is treated as an ordinary
109  *			character.
110  *
111  *      [4]     [set]   matches one of the characters in the set.
112  *                      If the first character in the set is "^",
113  *                      it matches a character NOT in the set, i.e.
114  *			complements the set. A shorthand S-E is
115  *			used to specify a set of characters S upto
116  *			E, inclusive. The special characters "]" and
117  *			"-" have no special meaning if they appear
118  *			as the first chars in the set.
119  *                      examples:        match:
120  *
121  *                              [a-z]    any lowercase alpha
122  *
123  *                              [^]-]    any char except ] and -
124  *
125  *                              [^A-Z]   any char except uppercase
126  *                                       alpha
127  *
128  *                              [a-zA-Z] any alpha
129  *
130  *      [5]     *       any regular expression form [1] to [4], followed by
131  *                      closure char (*) matches zero or more matches of
132  *                      that form.
133  *
134  *      [6]     +       same as [5], except it matches one or more.
135  *
136  *      [7]             a regular expression in the form [1] to [10], enclosed
137  *                      as \(form\) matches what form matches. The enclosure
138  *                      creates a set of tags, used for [8] and for
139  *                      pattern substution. The tagged forms are numbered
140  *			starting from 1.
141  *
142  *      [8]             a \ followed by a digit 1 to 9 matches whatever a
143  *                      previously tagged regular expression ([7]) matched.
144  *
145  *	[9]	\<	a regular expression starting with a \< construct
146  *		\>	and/or ending with a \> construct, restricts the
147  *			pattern matching to the beginning of a word, and/or
148  *			the end of a word. A word is defined to be a character
149  *			string beginning and/or ending with the characters
150  *			A-Z a-z 0-9 and _. It must also be preceded and/or
151  *			followed by any character outside those mentioned.
152  *
153  *      [10]            a composite regular expression xy where x and y
154  *                      are in the form [1] to [10] matches the longest
155  *                      match of x followed by a match for y.
156  *
157  *      [11]	^	a regular expression starting with a ^ character
158  *		$	and/or ending with a $ character, restricts the
159  *                      pattern matching to the beginning of the line,
160  *                      or the end of line. [anchors] Elsewhere in the
161  *			pattern, ^ and $ are treated as ordinary characters.
162  *
163  *
164  * Acknowledgements:
165  *
166  *	HCR's Hugh Redelmeier has been most helpful in various
167  *	stages of development. He convinced me to include BOW
168  *	and EOW constructs, originally invented by Rob Pike at
169  *	the University of Toronto.
170  *
171  * References:
172  *              Software tools			Kernighan & Plauger
173  *              Software tools in Pascal        Kernighan & Plauger
174  *              Grep [rsx-11 C dist]            David Conroy
175  *		ed - text editor		Un*x Programmer's Manual
176  *		Advanced editing on Un*x	B. W. Kernighan
177  *		RegExp routines			Henry Spencer
178  *
179  * Notes:
180  *
181  *	This implementation uses a bit-set representation for character
182  *	classes for speed and compactness. Each character is represented
183  *	by one bit in a 128-bit block. Thus, CCL always takes a
184  *	constant 16 bytes in the internal nfa, and re_exec does a single
185  *	bit comparison to locate the character in the set.
186  *
187  * Examples:
188  *
189  *	pattern:	foo*.*
190  *	compile:	CHR f CHR o CLO CHR o END CLO ANY END END
191  *	matches:	fo foo fooo foobar fobar foxx ...
192  *
193  *	pattern:	fo[ob]a[rz]
194  *	compile:	CHR f CHR o CCL bitset CHR a CCL bitset END
195  *	matches:	fobar fooar fobaz fooaz
196  *
197  *	pattern:	foo\\+
198  *	compile:	CHR f CHR o CHR o CHR \ CLO CHR \ END END
199  *	matches:	foo\ foo\\ foo\\\  ...
200  *
201  *	pattern:	\(foo\)[1-3]\1	(same as foo[1-3]foo)
202  *	compile:	BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
203  *	matches:	foo1foo foo2foo foo3foo
204  *
205  *	pattern:	\(fo.*\)-\1
206  *	compile:	BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
207  *	matches:	foo-foo fo-fo fob-fob foobar-foobar ...
208  */
209 
210 #define MAXNFA  1024
211 #define MAXTAG  10
212 
213 #define OKP     1
214 #define NOP     0
215 
216 #define CHR     1
217 #define ANY     2
218 #define CCL     3
219 #define BOL     4
220 #define EOL     5
221 #define BOT     6
222 #define EOT     7
223 #define BOW	8
224 #define EOW	9
225 #define REF     10
226 #define CLO     11
227 
228 #define END     0
229 
230 /*
231  * The following defines are not meant to be changeable.
232  * They are for readability only.
233  */
234 #define MAXCHR	128
235 #define CHRBIT	8
236 #define BITBLK	MAXCHR/CHRBIT
237 #define BLKIND	0170
238 #define BITIND	07
239 
240 #define ASCIIB	0177
241 
242 /* Plain char, on the other hand, may be signed or unsigned; it depends on
243  * the platform and perhaps a compiler option.  A hard fact of life, in C.
244  *
245  * 6-April-1999 mcs@netscape.com: replaced CHAR with REGEXCHAR to avoid
246  *              conflicts with system types on Win32.   Changed typedef
247  *              for REGEXCHAR to always be unsigned, which seems right.
248  */
249 typedef unsigned char REGEXCHAR;
250 
251 static int  tagstk[MAXTAG];             /* subpat tag stack..*/
252 static REGEXCHAR nfa[MAXNFA];		/* automaton..       */
253 static int  sta = NOP;               	/* status of lastpat */
254 
255 static REGEXCHAR bittab[BITBLK];	/* bit table for CCL */
256 					/* pre-set bits...   */
257 static REGEXCHAR bitarr[] = {1,2,4,8,16,32,64,128};
258 
259 static void
chset(REGEXCHAR c)260 chset(REGEXCHAR c)
261 {
262 	bittab[((c) & (unsigned)BLKIND) >> 3] |= bitarr[(c) & BITIND];
263 }
264 
265 #define badpat(x)	(*nfa = END, x)
266 #define store(x)	*mp++ = x
267 
268 char *
269 LDAP_CALL
re_comp(char * pat)270 re_comp( char *pat )
271 {
272 	register REGEXCHAR *p;          /* pattern pointer   */
273 	register REGEXCHAR *mp=nfa;     /* nfa pointer       */
274 	register REGEXCHAR *lp;         /* saved pointer..   */
275 	register REGEXCHAR *sp=nfa;     /* another one..     */
276 
277 	register int tagi = 0;          /* tag stack index   */
278 	register int tagc = 1;          /* actual tag count  */
279 
280 	register int n;
281 	register REGEXCHAR mask;	/* xor mask -CCL/NCL */
282 	int c1, c2;
283 
284 	if (!pat || !*pat) {
285 		if (sta) {
286 			return 0;
287 		} else {
288 			return badpat("No previous regular expression");
289 		}
290 	}
291 	sta = NOP;
292 
293 	for (p = (REGEXCHAR*)pat; *p; p++) {
294 		lp = mp;
295 		switch(*p) {
296 
297 		case '.':               /* match any char..  */
298 			store(ANY);
299 			break;
300 
301 		case '^':               /* match beginning.. */
302 			if (p == (REGEXCHAR*)pat)
303 				store(BOL);
304 			else {
305 				store(CHR);
306 				store(*p);
307 			}
308 			break;
309 
310 		case '$':               /* match endofline.. */
311 			if (!*(p+1))
312 				store(EOL);
313 			else {
314 				store(CHR);
315 				store(*p);
316 			}
317 			break;
318 
319 		case '[':               /* match char class..*/
320 			store(CCL);
321 
322 			if (*++p == '^') {
323 				mask = 0377;
324 				p++;
325 			}
326 			else
327 				mask = 0;
328 
329 			if (*p == '-')		/* real dash */
330 				chset(*p++);
331 			if (*p == ']')		/* real brac */
332 				chset(*p++);
333 			while (*p && *p != ']') {
334 				if (*p == '-' && *(p+1) && *(p+1) != ']') {
335 					p++;
336 					c1 = *(p-2) + 1;
337 					c2 = *p++;
338 					while (c1 <= c2)
339 						chset((REGEXCHAR)c1++);
340 				}
341 #ifdef EXTEND
342 				else if (*p == '\\' && *(p+1)) {
343 					p++;
344 					chset(*p++);
345 				}
346 #endif
347 				else
348 					chset(*p++);
349 			}
350 			if (!*p)
351 				return badpat("Missing ]");
352 
353 			for (n = 0; n < BITBLK; bittab[n++] = (REGEXCHAR) 0)
354 				store(mask ^ bittab[n]);
355 
356 			break;
357 
358 		case '*':               /* match 0 or more.. */
359 		case '+':               /* match 1 or more.. */
360 			if (p == (REGEXCHAR*)pat)
361 				return badpat("Empty closure");
362 			lp = sp;		/* previous opcode */
363 			if (*lp == CLO)		/* equivalence..   */
364 				break;
365 			switch(*lp) {
366 
367 			case BOL:
368 			case BOT:
369 			case EOT:
370 			case BOW:
371 			case EOW:
372 			case REF:
373 				return badpat("Illegal closure");
374 			default:
375 				break;
376 			}
377 
378 			if (*p == '+')
379 				for (sp = mp; lp < sp; lp++)
380 					store(*lp);
381 
382 			store(END);
383 			store(END);
384 			sp = mp;
385 			while (--mp > lp)
386 				*mp = mp[-1];
387 			store(CLO);
388 			mp = sp;
389 			break;
390 
391 		case '\\':              /* tags, backrefs .. */
392 			switch(*++p) {
393 
394 			case '(':
395 				if (tagc < MAXTAG) {
396 					tagstk[++tagi] = tagc;
397 					store(BOT);
398 					store(tagc++);
399 				}
400 				else
401 					return badpat("Too many \\(\\) pairs");
402 				break;
403 			case ')':
404 				if (*sp == BOT)
405 					return badpat("Null pattern inside \\(\\)");
406 				if (tagi > 0) {
407 					store(EOT);
408 					store(tagstk[tagi--]);
409 				}
410 				else
411 					return badpat("Unmatched \\)");
412 				break;
413 			case '<':
414 				store(BOW);
415 				break;
416 			case '>':
417 				if (*sp == BOW)
418 					return badpat("Null pattern inside \\<\\>");
419 				store(EOW);
420 				break;
421 			case '1':
422 			case '2':
423 			case '3':
424 			case '4':
425 			case '5':
426 			case '6':
427 			case '7':
428 			case '8':
429 			case '9':
430 				n = *p-'0';
431 				if (tagi > 0 && tagstk[tagi] == n)
432 					return badpat("Cyclical reference");
433 				if (tagc > n) {
434 					store(REF);
435 					store(n);
436 				}
437 				else
438 					return badpat("Undetermined reference");
439 				break;
440 #ifdef EXTEND
441 			case 'b':
442 				store(CHR);
443 				store('\b');
444 				break;
445 			case 'n':
446 				store(CHR);
447 				store('\n');
448 				break;
449 			case 'f':
450 				store(CHR);
451 				store('\f');
452 				break;
453 			case 'r':
454 				store(CHR);
455 				store('\r');
456 				break;
457 			case 't':
458 				store(CHR);
459 				store('\t');
460 				break;
461 #endif
462 			default:
463 				store(CHR);
464 				store(*p);
465 			}
466 			break;
467 
468 		default :               /* an ordinary char  */
469 			store(CHR);
470 			store(*p);
471 			break;
472 		}
473 		sp = lp;
474 	}
475 	if (tagi > 0)
476 		return badpat("Unmatched \\(");
477 	store(END);
478 	sta = OKP;
479 	return 0;
480 }
481 
482 
483 static REGEXCHAR *bol;
484 static REGEXCHAR *bopat[MAXTAG];
485 static REGEXCHAR *eopat[MAXTAG];
486 #ifdef NEEDPROTOS
487 static REGEXCHAR *pmatch( REGEXCHAR *lp, REGEXCHAR *ap );
488 #else /* NEEDPROTOS */
489 static REGEXCHAR *pmatch();
490 #endif /* NEEDPROTOS */
491 
492 /*
493  * re_exec:
494  * 	execute nfa to find a match.
495  *
496  *	special cases: (nfa[0])
497  *		BOL
498  *			Match only once, starting from the
499  *			beginning.
500  *		CHR
501  *			First locate the character without
502  *			calling pmatch, and if found, call
503  *			pmatch for the remaining string.
504  *		END
505  *			re_comp failed, poor luser did not
506  *			check for it. Fail fast.
507  *
508  *	If a match is found, bopat[0] and eopat[0] are set
509  *	to the beginning and the end of the matched fragment,
510  *	respectively.
511  *
512  */
513 
514 int
515 LDAP_CALL
re_exec(char * lp)516 re_exec( char *lp )
517 {
518 	register REGEXCHAR c;
519 	register REGEXCHAR *ep = 0;
520 	register REGEXCHAR *ap = nfa;
521 
522 	bol = (REGEXCHAR*)lp;
523 
524 	bopat[0] = 0;
525 	bopat[1] = 0;
526 	bopat[2] = 0;
527 	bopat[3] = 0;
528 	bopat[4] = 0;
529 	bopat[5] = 0;
530 	bopat[6] = 0;
531 	bopat[7] = 0;
532 	bopat[8] = 0;
533 	bopat[9] = 0;
534 
535 	switch(*ap) {
536 
537 	case BOL:			/* anchored: match from BOL only */
538 		ep = pmatch((REGEXCHAR*)lp,ap);
539 		break;
540 	case CHR:			/* ordinary char: locate it fast */
541 		c = *(ap+1);
542 		while (*lp && *(REGEXCHAR*)lp != c)
543 			lp++;
544 		if (!*lp)		/* if EOS, fail, else fall thru. */
545 			return 0;
546 	default:			/* regular matching all the way. */
547 		do {
548 			if ((ep = pmatch((REGEXCHAR*)lp,ap)))
549 				break;
550 			lp++;
551 		} while (*lp);
552 
553 		break;
554 	case END:			/* munged automaton. fail always */
555 		return 0;
556 	}
557 	if (!ep)
558 		return 0;
559 
560 	bopat[0] = (REGEXCHAR*)lp;
561 	eopat[0] = ep;
562 	return 1;
563 }
564 
565 /*
566  * pmatch: internal routine for the hard part
567  *
568  * 	This code is partly snarfed from an early grep written by
569  *	David Conroy. The backref and tag stuff, and various other
570  *	innovations are by oz.
571  *
572  *	special case optimizations: (nfa[n], nfa[n+1])
573  *		CLO ANY
574  *			We KNOW .* will match everything upto the
575  *			end of line. Thus, directly go to the end of
576  *			line, without recursive pmatch calls. As in
577  *			the other closure cases, the remaining pattern
578  *			must be matched by moving backwards on the
579  *			string recursively, to find a match for xy
580  *			(x is ".*" and y is the remaining pattern)
581  *			where the match satisfies the LONGEST match for
582  *			x followed by a match for y.
583  *		CLO CHR
584  *			We can again scan the string forward for the
585  *			single char and at the point of failure, we
586  *			execute the remaining nfa recursively, same as
587  *			above.
588  *
589  *	At the end of a successful match, bopat[n] and eopat[n]
590  *	are set to the beginning and end of subpatterns matched
591  *	by tagged expressions (n = 1 to 9).
592  *
593  */
594 
595 #ifndef re_fail
596 extern void re_fail();
597 #endif /* re_fail */
598 
599 /*
600  * character classification table for word boundary operators BOW
601  * and EOW. the reason for not using ctype macros is that we can
602  * let the user add into our own table. see re_modw. This table
603  * is not in the bitset form, since we may wish to extend it in the
604  * future for other character classifications.
605  *
606  *	TRUE for 0-9 A-Z a-z _
607  */
608 static char chrtyp[MAXCHR] = {
609 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
611 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 	0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
614 	1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
615 	0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
616 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
617 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
618 	1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
619 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
620 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
621 	1, 1, 1, 0, 0, 0, 0, 0
622 	};
623 
624 #define HIBIT		0200
625 #define inascii(x)	(0177&(x))
626 #define iswordc(x) 	chrtyp[inascii(x)]
627 #define isinset(x,y) 	(((y)&HIBIT)?0:((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND]))
628 
629 /*
630  * skip values for CLO XXX to skip past the closure
631  */
632 
633 #define ANYSKIP	2 	/* [CLO] ANY END ...	     */
634 #define CHRSKIP	3	/* [CLO] CHR chr END ...     */
635 #define CCLSKIP 18	/* [CLO] CCL 16bytes END ... */
636 
637 static REGEXCHAR *
pmatch(REGEXCHAR * lp,REGEXCHAR * ap)638 pmatch( REGEXCHAR *lp, REGEXCHAR *ap)
639 {
640 	register int op, c, n;
641 	register REGEXCHAR *e;		/* extra pointer for CLO */
642 	register REGEXCHAR *bp;		/* beginning of subpat.. */
643 	register REGEXCHAR *ep;		/* ending of subpat..	 */
644 	REGEXCHAR *are;			/* to save the line ptr. */
645 
646 	while ((op = *ap++) != END)
647 		switch(op) {
648 
649 		case CHR:
650 			if (*lp++ != *ap++)
651 				return 0;
652 			break;
653 		case ANY:
654 			if (!*lp++)
655 				return 0;
656 			break;
657 		case CCL:
658 			c = *lp++;
659 			if (!isinset(ap,c))
660 				return 0;
661 			ap += BITBLK;
662 			break;
663 		case BOL:
664 			if (lp != bol)
665 				return 0;
666 			break;
667 		case EOL:
668 			if (*lp)
669 				return 0;
670 			break;
671 		case BOT:
672 			bopat[*ap++] = lp;
673 			break;
674 		case EOT:
675 			eopat[*ap++] = lp;
676 			break;
677  		case BOW:
678 			if ((lp!=bol && iswordc(lp[-1])) || !iswordc(*lp))
679 				return 0;
680 			break;
681 		case EOW:
682 			if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp))
683 				return 0;
684 			break;
685 		case REF:
686 			n = *ap++;
687 			bp = bopat[n];
688 			ep = eopat[n];
689 			while (bp < ep)
690 				if (*bp++ != *lp++)
691 					return 0;
692 			break;
693 		case CLO:
694 			are = lp;
695 			switch(*ap) {
696 
697 			case ANY:
698 				while (*lp)
699 					lp++;
700 				n = ANYSKIP;
701 				break;
702 			case CHR:
703 				c = *(ap+1);
704 				while (*lp && c == *lp)
705 					lp++;
706 				n = CHRSKIP;
707 				break;
708 			case CCL:
709 				while ((c = *lp) && isinset(ap+1,c))
710 					lp++;
711 				n = CCLSKIP;
712 				break;
713 			default:
714 				re_fail("closure: bad nfa.", *ap);
715 				return 0;
716 			}
717 
718 			ap += n;
719 
720 			while (lp >= are) {
721 				if ((e = pmatch(lp, ap)))
722 					return e;
723 				--lp;
724 			}
725 			return 0;
726 		default:
727 			re_fail("re_exec: bad nfa.", op);
728 			return 0;
729 		}
730 	return lp;
731 }
732 
733 /*
734  * re_modw:
735  *	add new characters into the word table to change re_exec's
736  *	understanding of what a word should look like. Note that we
737  *	only accept additions into the word definition.
738  *
739  *	If the string parameter is 0 or null string, the table is
740  *	reset back to the default containing A-Z a-z 0-9 _. [We use
741  *	the compact bitset representation for the default table]
742  */
743 
744 static REGEXCHAR deftab[16] = {
745 	0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207,
746 	0376, 0377, 0377, 007
747 };
748 
749 void
750 LDAP_CALL
re_modw(char * s)751 re_modw( char *s )
752 {
753 	register int i;
754 
755 	if (!s || !*s) {
756 		for (i = 0; i < MAXCHR; i++)
757 			if (!isinset(deftab,i))
758 				iswordc(i) = 0;
759 	}
760 	else
761 		while(*s)
762 			iswordc(*s++) = 1;
763 }
764 
765 /*
766  * re_subs:
767  *	substitute the matched portions of the src in dst.
768  *
769  *	&	substitute the entire matched pattern.
770  *
771  *	\digit	substitute a subpattern, with the given	tag number.
772  *		Tags are numbered from 1 to 9. If the particular
773  *		tagged subpattern does not exist, null is substituted.
774  */
775 int
776 LDAP_CALL
re_subs(char * src,char * dst)777 re_subs( char *src, char *dst)
778 {
779 	register char      c;
780 	register int       pin;
781 	register REGEXCHAR *bp;
782 	register REGEXCHAR *ep;
783 
784 	if (!*src || !bopat[0])
785 		return 0;
786 
787 	while ((c = *src++)) {
788 		switch(c) {
789 
790 		case '&':
791 			pin = 0;
792 			break;
793 
794 		case '\\':
795 			c = *src++;
796 			if (c >= '0' && c <= '9') {
797 				pin = c - '0';
798 				break;
799 			}
800 
801 		default:
802 			*dst++ = c;
803 			continue;
804 		}
805 
806 		if ((bp = bopat[pin]) && (ep = eopat[pin])) {
807 			while (*bp && bp < ep)
808 				*dst++ = *(char*)bp++;
809 			if (bp < ep)
810 				return 0;
811 		}
812 	}
813 	*dst = (char) 0;
814 	return 1;
815 }
816 
817 #ifdef DEBUG
818 
819 /* No printf or exit in 16-bit Windows */
820 #if defined( _WINDOWS ) && !defined( _WIN32 )
printf(const char * pszFormat,...)821 static int LDAP_C printf( const char* pszFormat, ...)
822 {
823     char buf[1024];
824 	va_list arglist;
825 	va_start(arglist, pszFormat);
826     vsprintf(buf, pszFormat, arglist);
827 	va_end(arglist);
828     OutputDebugString(buf);
829 	return 0;
830 }
831 #define exit(v) return
832 #endif /* 16-bit Windows */
833 
834 
835 #ifdef REGEX_DEBUG
836 
837 static void nfadump( REGEXCHAR *ap);
838 
839 /*
840  * symbolic - produce a symbolic dump of the nfa
841  */
842 void
symbolic(char * s)843 symbolic( char *s )
844 {
845 	printf("pattern: %s\n", s);
846 	printf("nfacode:\n");
847 	nfadump(nfa);
848 }
849 
850 static void
nfadump(REGEXCHAR * ap)851 nfadump( REGEXCHAR *ap)
852 {
853 	register int n;
854 
855 	while (*ap != END)
856 		switch(*ap++) {
857 		case CLO:
858 			printf("CLOSURE");
859 			nfadump(ap);
860 			switch(*ap) {
861 			case CHR:
862 				n = CHRSKIP;
863 				break;
864 			case ANY:
865 				n = ANYSKIP;
866 				break;
867 			case CCL:
868 				n = CCLSKIP;
869 				break;
870 			}
871 			ap += n;
872 			break;
873 		case CHR:
874 			printf("\tCHR %c\n",*ap++);
875 			break;
876 		case ANY:
877 			printf("\tANY .\n");
878 			break;
879 		case BOL:
880 			printf("\tBOL -\n");
881 			break;
882 		case EOL:
883 			printf("\tEOL -\n");
884 			break;
885 		case BOT:
886 			printf("BOT: %d\n",*ap++);
887 			break;
888 		case EOT:
889 			printf("EOT: %d\n",*ap++);
890 			break;
891 		case BOW:
892 			printf("BOW\n");
893 			break;
894 		case EOW:
895 			printf("EOW\n");
896 			break;
897 		case REF:
898 			printf("REF: %d\n",*ap++);
899 			break;
900 		case CCL:
901 			printf("\tCCL [");
902 			for (n = 0; n < MAXCHR; n++)
903 				if (isinset(ap,(REGEXCHAR)n)) {
904 					if (n < ' ')
905 						printf("^%c", n ^ 0x040);
906 					else
907 						printf("%c", n);
908 				}
909 			printf("]\n");
910 			ap += BITBLK;
911 			break;
912 		default:
913 			printf("bad nfa. opcode %o\n", ap[-1]);
914 			exit(1);
915 			break;
916 		}
917 }
918 #endif /* REGEX_DEBUG */
919 #endif /* DEBUG */
920 #endif /* macintosh or DOS or _WINDOWS or NEED_BSDREGEX */
921