1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Original Code is Mozilla Communicator client code, released
15 * March 31, 1998.
16 *
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998-1999
20 * the Initial Developer. All Rights Reserved.
21 *
22 * Contributor(s):
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37 #include "ldap-int.h"
38 #if defined( macintosh ) || defined( DOS ) || defined( _WINDOWS ) || defined( NEED_BSDREGEX ) || defined( XP_OS2)
39 #include "regex.h"
40
41 /*
42 * regex - Regular expression pattern matching and replacement
43 *
44 * By: Ozan S. Yigit (oz)
45 * Dept. of Computer Science
46 * York University
47 *
48 * These routines are the PUBLIC DOMAIN equivalents of regex
49 * routines as found in 4.nBSD UN*X, with minor extensions.
50 *
51 * These routines are derived from various implementations found
52 * in software tools books, and Conroy's grep. They are NOT derived
53 * from licensed/restricted software.
54 * For more interesting/academic/complicated implementations,
55 * see Henry Spencer's regexp routines, or GNU Emacs pattern
56 * matching module.
57 *
58 * Use the actual CCL code in the CLO
59 * section of pmatch. No need for a recursive
60 * pmatch call.
61 *
62 * Use a bitmap table to set char bits in an
63 * 8-bit chunk.
64 *
65 * Interfaces:
66 * re_comp: compile a regular expression into a NFA.
67 *
68 * char *re_comp(s)
69 * char *s;
70 *
71 * re_exec: execute the NFA to match a pattern.
72 *
73 * int re_exec(s)
74 * char *s;
75 *
76 * re_modw change re_exec's understanding of what a "word"
77 * looks like (for \< and \>) by adding into the
78 * hidden word-syntax table.
79 *
80 * void re_modw(s)
81 * char *s;
82 *
83 * re_subs: substitute the matched portions in a new string.
84 *
85 * int re_subs(src, dst)
86 * char *src;
87 * char *dst;
88 *
89 * re_fail: failure routine for re_exec.
90 *
91 * void re_fail(msg, op)
92 * char *msg;
93 * char op;
94 *
95 * Regular Expressions:
96 *
97 * [1] char matches itself, unless it is a special
98 * character (metachar): . \ [ ] * + ^ $
99 *
100 * [2] . matches any character.
101 *
102 * [3] \ matches the character following it, except
103 * when followed by a left or right round bracket,
104 * a digit 1 to 9 or a left or right angle bracket.
105 * (see [7], [8] and [9])
106 * It is used as an escape character for all
107 * other meta-characters, and itself. When used
108 * in a set ([4]), it is treated as an ordinary
109 * character.
110 *
111 * [4] [set] matches one of the characters in the set.
112 * If the first character in the set is "^",
113 * it matches a character NOT in the set, i.e.
114 * complements the set. A shorthand S-E is
115 * used to specify a set of characters S upto
116 * E, inclusive. The special characters "]" and
117 * "-" have no special meaning if they appear
118 * as the first chars in the set.
119 * examples: match:
120 *
121 * [a-z] any lowercase alpha
122 *
123 * [^]-] any char except ] and -
124 *
125 * [^A-Z] any char except uppercase
126 * alpha
127 *
128 * [a-zA-Z] any alpha
129 *
130 * [5] * any regular expression form [1] to [4], followed by
131 * closure char (*) matches zero or more matches of
132 * that form.
133 *
134 * [6] + same as [5], except it matches one or more.
135 *
136 * [7] a regular expression in the form [1] to [10], enclosed
137 * as \(form\) matches what form matches. The enclosure
138 * creates a set of tags, used for [8] and for
139 * pattern substution. The tagged forms are numbered
140 * starting from 1.
141 *
142 * [8] a \ followed by a digit 1 to 9 matches whatever a
143 * previously tagged regular expression ([7]) matched.
144 *
145 * [9] \< a regular expression starting with a \< construct
146 * \> and/or ending with a \> construct, restricts the
147 * pattern matching to the beginning of a word, and/or
148 * the end of a word. A word is defined to be a character
149 * string beginning and/or ending with the characters
150 * A-Z a-z 0-9 and _. It must also be preceded and/or
151 * followed by any character outside those mentioned.
152 *
153 * [10] a composite regular expression xy where x and y
154 * are in the form [1] to [10] matches the longest
155 * match of x followed by a match for y.
156 *
157 * [11] ^ a regular expression starting with a ^ character
158 * $ and/or ending with a $ character, restricts the
159 * pattern matching to the beginning of the line,
160 * or the end of line. [anchors] Elsewhere in the
161 * pattern, ^ and $ are treated as ordinary characters.
162 *
163 *
164 * Acknowledgements:
165 *
166 * HCR's Hugh Redelmeier has been most helpful in various
167 * stages of development. He convinced me to include BOW
168 * and EOW constructs, originally invented by Rob Pike at
169 * the University of Toronto.
170 *
171 * References:
172 * Software tools Kernighan & Plauger
173 * Software tools in Pascal Kernighan & Plauger
174 * Grep [rsx-11 C dist] David Conroy
175 * ed - text editor Un*x Programmer's Manual
176 * Advanced editing on Un*x B. W. Kernighan
177 * RegExp routines Henry Spencer
178 *
179 * Notes:
180 *
181 * This implementation uses a bit-set representation for character
182 * classes for speed and compactness. Each character is represented
183 * by one bit in a 128-bit block. Thus, CCL always takes a
184 * constant 16 bytes in the internal nfa, and re_exec does a single
185 * bit comparison to locate the character in the set.
186 *
187 * Examples:
188 *
189 * pattern: foo*.*
190 * compile: CHR f CHR o CLO CHR o END CLO ANY END END
191 * matches: fo foo fooo foobar fobar foxx ...
192 *
193 * pattern: fo[ob]a[rz]
194 * compile: CHR f CHR o CCL bitset CHR a CCL bitset END
195 * matches: fobar fooar fobaz fooaz
196 *
197 * pattern: foo\\+
198 * compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END
199 * matches: foo\ foo\\ foo\\\ ...
200 *
201 * pattern: \(foo\)[1-3]\1 (same as foo[1-3]foo)
202 * compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
203 * matches: foo1foo foo2foo foo3foo
204 *
205 * pattern: \(fo.*\)-\1
206 * compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
207 * matches: foo-foo fo-fo fob-fob foobar-foobar ...
208 */
209
210 #define MAXNFA 1024
211 #define MAXTAG 10
212
213 #define OKP 1
214 #define NOP 0
215
216 #define CHR 1
217 #define ANY 2
218 #define CCL 3
219 #define BOL 4
220 #define EOL 5
221 #define BOT 6
222 #define EOT 7
223 #define BOW 8
224 #define EOW 9
225 #define REF 10
226 #define CLO 11
227
228 #define END 0
229
230 /*
231 * The following defines are not meant to be changeable.
232 * They are for readability only.
233 */
234 #define MAXCHR 128
235 #define CHRBIT 8
236 #define BITBLK MAXCHR/CHRBIT
237 #define BLKIND 0170
238 #define BITIND 07
239
240 #define ASCIIB 0177
241
242 /* Plain char, on the other hand, may be signed or unsigned; it depends on
243 * the platform and perhaps a compiler option. A hard fact of life, in C.
244 *
245 * 6-April-1999 mcs@netscape.com: replaced CHAR with REGEXCHAR to avoid
246 * conflicts with system types on Win32. Changed typedef
247 * for REGEXCHAR to always be unsigned, which seems right.
248 */
249 typedef unsigned char REGEXCHAR;
250
251 static int tagstk[MAXTAG]; /* subpat tag stack..*/
252 static REGEXCHAR nfa[MAXNFA]; /* automaton.. */
253 static int sta = NOP; /* status of lastpat */
254
255 static REGEXCHAR bittab[BITBLK]; /* bit table for CCL */
256 /* pre-set bits... */
257 static REGEXCHAR bitarr[] = {1,2,4,8,16,32,64,128};
258
259 static void
chset(REGEXCHAR c)260 chset(REGEXCHAR c)
261 {
262 bittab[((c) & (unsigned)BLKIND) >> 3] |= bitarr[(c) & BITIND];
263 }
264
265 #define badpat(x) (*nfa = END, x)
266 #define store(x) *mp++ = x
267
268 char *
269 LDAP_CALL
re_comp(char * pat)270 re_comp( char *pat )
271 {
272 register REGEXCHAR *p; /* pattern pointer */
273 register REGEXCHAR *mp=nfa; /* nfa pointer */
274 register REGEXCHAR *lp; /* saved pointer.. */
275 register REGEXCHAR *sp=nfa; /* another one.. */
276
277 register int tagi = 0; /* tag stack index */
278 register int tagc = 1; /* actual tag count */
279
280 register int n;
281 register REGEXCHAR mask; /* xor mask -CCL/NCL */
282 int c1, c2;
283
284 if (!pat || !*pat) {
285 if (sta) {
286 return 0;
287 } else {
288 return badpat("No previous regular expression");
289 }
290 }
291 sta = NOP;
292
293 for (p = (REGEXCHAR*)pat; *p; p++) {
294 lp = mp;
295 switch(*p) {
296
297 case '.': /* match any char.. */
298 store(ANY);
299 break;
300
301 case '^': /* match beginning.. */
302 if (p == (REGEXCHAR*)pat)
303 store(BOL);
304 else {
305 store(CHR);
306 store(*p);
307 }
308 break;
309
310 case '$': /* match endofline.. */
311 if (!*(p+1))
312 store(EOL);
313 else {
314 store(CHR);
315 store(*p);
316 }
317 break;
318
319 case '[': /* match char class..*/
320 store(CCL);
321
322 if (*++p == '^') {
323 mask = 0377;
324 p++;
325 }
326 else
327 mask = 0;
328
329 if (*p == '-') /* real dash */
330 chset(*p++);
331 if (*p == ']') /* real brac */
332 chset(*p++);
333 while (*p && *p != ']') {
334 if (*p == '-' && *(p+1) && *(p+1) != ']') {
335 p++;
336 c1 = *(p-2) + 1;
337 c2 = *p++;
338 while (c1 <= c2)
339 chset((REGEXCHAR)c1++);
340 }
341 #ifdef EXTEND
342 else if (*p == '\\' && *(p+1)) {
343 p++;
344 chset(*p++);
345 }
346 #endif
347 else
348 chset(*p++);
349 }
350 if (!*p)
351 return badpat("Missing ]");
352
353 for (n = 0; n < BITBLK; bittab[n++] = (REGEXCHAR) 0)
354 store(mask ^ bittab[n]);
355
356 break;
357
358 case '*': /* match 0 or more.. */
359 case '+': /* match 1 or more.. */
360 if (p == (REGEXCHAR*)pat)
361 return badpat("Empty closure");
362 lp = sp; /* previous opcode */
363 if (*lp == CLO) /* equivalence.. */
364 break;
365 switch(*lp) {
366
367 case BOL:
368 case BOT:
369 case EOT:
370 case BOW:
371 case EOW:
372 case REF:
373 return badpat("Illegal closure");
374 default:
375 break;
376 }
377
378 if (*p == '+')
379 for (sp = mp; lp < sp; lp++)
380 store(*lp);
381
382 store(END);
383 store(END);
384 sp = mp;
385 while (--mp > lp)
386 *mp = mp[-1];
387 store(CLO);
388 mp = sp;
389 break;
390
391 case '\\': /* tags, backrefs .. */
392 switch(*++p) {
393
394 case '(':
395 if (tagc < MAXTAG) {
396 tagstk[++tagi] = tagc;
397 store(BOT);
398 store(tagc++);
399 }
400 else
401 return badpat("Too many \\(\\) pairs");
402 break;
403 case ')':
404 if (*sp == BOT)
405 return badpat("Null pattern inside \\(\\)");
406 if (tagi > 0) {
407 store(EOT);
408 store(tagstk[tagi--]);
409 }
410 else
411 return badpat("Unmatched \\)");
412 break;
413 case '<':
414 store(BOW);
415 break;
416 case '>':
417 if (*sp == BOW)
418 return badpat("Null pattern inside \\<\\>");
419 store(EOW);
420 break;
421 case '1':
422 case '2':
423 case '3':
424 case '4':
425 case '5':
426 case '6':
427 case '7':
428 case '8':
429 case '9':
430 n = *p-'0';
431 if (tagi > 0 && tagstk[tagi] == n)
432 return badpat("Cyclical reference");
433 if (tagc > n) {
434 store(REF);
435 store(n);
436 }
437 else
438 return badpat("Undetermined reference");
439 break;
440 #ifdef EXTEND
441 case 'b':
442 store(CHR);
443 store('\b');
444 break;
445 case 'n':
446 store(CHR);
447 store('\n');
448 break;
449 case 'f':
450 store(CHR);
451 store('\f');
452 break;
453 case 'r':
454 store(CHR);
455 store('\r');
456 break;
457 case 't':
458 store(CHR);
459 store('\t');
460 break;
461 #endif
462 default:
463 store(CHR);
464 store(*p);
465 }
466 break;
467
468 default : /* an ordinary char */
469 store(CHR);
470 store(*p);
471 break;
472 }
473 sp = lp;
474 }
475 if (tagi > 0)
476 return badpat("Unmatched \\(");
477 store(END);
478 sta = OKP;
479 return 0;
480 }
481
482
483 static REGEXCHAR *bol;
484 static REGEXCHAR *bopat[MAXTAG];
485 static REGEXCHAR *eopat[MAXTAG];
486 #ifdef NEEDPROTOS
487 static REGEXCHAR *pmatch( REGEXCHAR *lp, REGEXCHAR *ap );
488 #else /* NEEDPROTOS */
489 static REGEXCHAR *pmatch();
490 #endif /* NEEDPROTOS */
491
492 /*
493 * re_exec:
494 * execute nfa to find a match.
495 *
496 * special cases: (nfa[0])
497 * BOL
498 * Match only once, starting from the
499 * beginning.
500 * CHR
501 * First locate the character without
502 * calling pmatch, and if found, call
503 * pmatch for the remaining string.
504 * END
505 * re_comp failed, poor luser did not
506 * check for it. Fail fast.
507 *
508 * If a match is found, bopat[0] and eopat[0] are set
509 * to the beginning and the end of the matched fragment,
510 * respectively.
511 *
512 */
513
514 int
515 LDAP_CALL
re_exec(char * lp)516 re_exec( char *lp )
517 {
518 register REGEXCHAR c;
519 register REGEXCHAR *ep = 0;
520 register REGEXCHAR *ap = nfa;
521
522 bol = (REGEXCHAR*)lp;
523
524 bopat[0] = 0;
525 bopat[1] = 0;
526 bopat[2] = 0;
527 bopat[3] = 0;
528 bopat[4] = 0;
529 bopat[5] = 0;
530 bopat[6] = 0;
531 bopat[7] = 0;
532 bopat[8] = 0;
533 bopat[9] = 0;
534
535 switch(*ap) {
536
537 case BOL: /* anchored: match from BOL only */
538 ep = pmatch((REGEXCHAR*)lp,ap);
539 break;
540 case CHR: /* ordinary char: locate it fast */
541 c = *(ap+1);
542 while (*lp && *(REGEXCHAR*)lp != c)
543 lp++;
544 if (!*lp) /* if EOS, fail, else fall thru. */
545 return 0;
546 default: /* regular matching all the way. */
547 do {
548 if ((ep = pmatch((REGEXCHAR*)lp,ap)))
549 break;
550 lp++;
551 } while (*lp);
552
553 break;
554 case END: /* munged automaton. fail always */
555 return 0;
556 }
557 if (!ep)
558 return 0;
559
560 bopat[0] = (REGEXCHAR*)lp;
561 eopat[0] = ep;
562 return 1;
563 }
564
565 /*
566 * pmatch: internal routine for the hard part
567 *
568 * This code is partly snarfed from an early grep written by
569 * David Conroy. The backref and tag stuff, and various other
570 * innovations are by oz.
571 *
572 * special case optimizations: (nfa[n], nfa[n+1])
573 * CLO ANY
574 * We KNOW .* will match everything upto the
575 * end of line. Thus, directly go to the end of
576 * line, without recursive pmatch calls. As in
577 * the other closure cases, the remaining pattern
578 * must be matched by moving backwards on the
579 * string recursively, to find a match for xy
580 * (x is ".*" and y is the remaining pattern)
581 * where the match satisfies the LONGEST match for
582 * x followed by a match for y.
583 * CLO CHR
584 * We can again scan the string forward for the
585 * single char and at the point of failure, we
586 * execute the remaining nfa recursively, same as
587 * above.
588 *
589 * At the end of a successful match, bopat[n] and eopat[n]
590 * are set to the beginning and end of subpatterns matched
591 * by tagged expressions (n = 1 to 9).
592 *
593 */
594
595 #ifndef re_fail
596 extern void re_fail();
597 #endif /* re_fail */
598
599 /*
600 * character classification table for word boundary operators BOW
601 * and EOW. the reason for not using ctype macros is that we can
602 * let the user add into our own table. see re_modw. This table
603 * is not in the bitset form, since we may wish to extend it in the
604 * future for other character classifications.
605 *
606 * TRUE for 0-9 A-Z a-z _
607 */
608 static char chrtyp[MAXCHR] = {
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
611 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
612 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
613 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
614 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
615 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
616 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
617 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
618 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
619 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
620 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
621 1, 1, 1, 0, 0, 0, 0, 0
622 };
623
624 #define HIBIT 0200
625 #define inascii(x) (0177&(x))
626 #define iswordc(x) chrtyp[inascii(x)]
627 #define isinset(x,y) (((y)&HIBIT)?0:((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND]))
628
629 /*
630 * skip values for CLO XXX to skip past the closure
631 */
632
633 #define ANYSKIP 2 /* [CLO] ANY END ... */
634 #define CHRSKIP 3 /* [CLO] CHR chr END ... */
635 #define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */
636
637 static REGEXCHAR *
pmatch(REGEXCHAR * lp,REGEXCHAR * ap)638 pmatch( REGEXCHAR *lp, REGEXCHAR *ap)
639 {
640 register int op, c, n;
641 register REGEXCHAR *e; /* extra pointer for CLO */
642 register REGEXCHAR *bp; /* beginning of subpat.. */
643 register REGEXCHAR *ep; /* ending of subpat.. */
644 REGEXCHAR *are; /* to save the line ptr. */
645
646 while ((op = *ap++) != END)
647 switch(op) {
648
649 case CHR:
650 if (*lp++ != *ap++)
651 return 0;
652 break;
653 case ANY:
654 if (!*lp++)
655 return 0;
656 break;
657 case CCL:
658 c = *lp++;
659 if (!isinset(ap,c))
660 return 0;
661 ap += BITBLK;
662 break;
663 case BOL:
664 if (lp != bol)
665 return 0;
666 break;
667 case EOL:
668 if (*lp)
669 return 0;
670 break;
671 case BOT:
672 bopat[*ap++] = lp;
673 break;
674 case EOT:
675 eopat[*ap++] = lp;
676 break;
677 case BOW:
678 if ((lp!=bol && iswordc(lp[-1])) || !iswordc(*lp))
679 return 0;
680 break;
681 case EOW:
682 if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp))
683 return 0;
684 break;
685 case REF:
686 n = *ap++;
687 bp = bopat[n];
688 ep = eopat[n];
689 while (bp < ep)
690 if (*bp++ != *lp++)
691 return 0;
692 break;
693 case CLO:
694 are = lp;
695 switch(*ap) {
696
697 case ANY:
698 while (*lp)
699 lp++;
700 n = ANYSKIP;
701 break;
702 case CHR:
703 c = *(ap+1);
704 while (*lp && c == *lp)
705 lp++;
706 n = CHRSKIP;
707 break;
708 case CCL:
709 while ((c = *lp) && isinset(ap+1,c))
710 lp++;
711 n = CCLSKIP;
712 break;
713 default:
714 re_fail("closure: bad nfa.", *ap);
715 return 0;
716 }
717
718 ap += n;
719
720 while (lp >= are) {
721 if ((e = pmatch(lp, ap)))
722 return e;
723 --lp;
724 }
725 return 0;
726 default:
727 re_fail("re_exec: bad nfa.", op);
728 return 0;
729 }
730 return lp;
731 }
732
733 /*
734 * re_modw:
735 * add new characters into the word table to change re_exec's
736 * understanding of what a word should look like. Note that we
737 * only accept additions into the word definition.
738 *
739 * If the string parameter is 0 or null string, the table is
740 * reset back to the default containing A-Z a-z 0-9 _. [We use
741 * the compact bitset representation for the default table]
742 */
743
744 static REGEXCHAR deftab[16] = {
745 0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207,
746 0376, 0377, 0377, 007
747 };
748
749 void
750 LDAP_CALL
re_modw(char * s)751 re_modw( char *s )
752 {
753 register int i;
754
755 if (!s || !*s) {
756 for (i = 0; i < MAXCHR; i++)
757 if (!isinset(deftab,i))
758 iswordc(i) = 0;
759 }
760 else
761 while(*s)
762 iswordc(*s++) = 1;
763 }
764
765 /*
766 * re_subs:
767 * substitute the matched portions of the src in dst.
768 *
769 * & substitute the entire matched pattern.
770 *
771 * \digit substitute a subpattern, with the given tag number.
772 * Tags are numbered from 1 to 9. If the particular
773 * tagged subpattern does not exist, null is substituted.
774 */
775 int
776 LDAP_CALL
re_subs(char * src,char * dst)777 re_subs( char *src, char *dst)
778 {
779 register char c;
780 register int pin;
781 register REGEXCHAR *bp;
782 register REGEXCHAR *ep;
783
784 if (!*src || !bopat[0])
785 return 0;
786
787 while ((c = *src++)) {
788 switch(c) {
789
790 case '&':
791 pin = 0;
792 break;
793
794 case '\\':
795 c = *src++;
796 if (c >= '0' && c <= '9') {
797 pin = c - '0';
798 break;
799 }
800
801 default:
802 *dst++ = c;
803 continue;
804 }
805
806 if ((bp = bopat[pin]) && (ep = eopat[pin])) {
807 while (*bp && bp < ep)
808 *dst++ = *(char*)bp++;
809 if (bp < ep)
810 return 0;
811 }
812 }
813 *dst = (char) 0;
814 return 1;
815 }
816
817 #ifdef DEBUG
818
819 /* No printf or exit in 16-bit Windows */
820 #if defined( _WINDOWS ) && !defined( _WIN32 )
printf(const char * pszFormat,...)821 static int LDAP_C printf( const char* pszFormat, ...)
822 {
823 char buf[1024];
824 va_list arglist;
825 va_start(arglist, pszFormat);
826 vsprintf(buf, pszFormat, arglist);
827 va_end(arglist);
828 OutputDebugString(buf);
829 return 0;
830 }
831 #define exit(v) return
832 #endif /* 16-bit Windows */
833
834
835 #ifdef REGEX_DEBUG
836
837 static void nfadump( REGEXCHAR *ap);
838
839 /*
840 * symbolic - produce a symbolic dump of the nfa
841 */
842 void
symbolic(char * s)843 symbolic( char *s )
844 {
845 printf("pattern: %s\n", s);
846 printf("nfacode:\n");
847 nfadump(nfa);
848 }
849
850 static void
nfadump(REGEXCHAR * ap)851 nfadump( REGEXCHAR *ap)
852 {
853 register int n;
854
855 while (*ap != END)
856 switch(*ap++) {
857 case CLO:
858 printf("CLOSURE");
859 nfadump(ap);
860 switch(*ap) {
861 case CHR:
862 n = CHRSKIP;
863 break;
864 case ANY:
865 n = ANYSKIP;
866 break;
867 case CCL:
868 n = CCLSKIP;
869 break;
870 }
871 ap += n;
872 break;
873 case CHR:
874 printf("\tCHR %c\n",*ap++);
875 break;
876 case ANY:
877 printf("\tANY .\n");
878 break;
879 case BOL:
880 printf("\tBOL -\n");
881 break;
882 case EOL:
883 printf("\tEOL -\n");
884 break;
885 case BOT:
886 printf("BOT: %d\n",*ap++);
887 break;
888 case EOT:
889 printf("EOT: %d\n",*ap++);
890 break;
891 case BOW:
892 printf("BOW\n");
893 break;
894 case EOW:
895 printf("EOW\n");
896 break;
897 case REF:
898 printf("REF: %d\n",*ap++);
899 break;
900 case CCL:
901 printf("\tCCL [");
902 for (n = 0; n < MAXCHR; n++)
903 if (isinset(ap,(REGEXCHAR)n)) {
904 if (n < ' ')
905 printf("^%c", n ^ 0x040);
906 else
907 printf("%c", n);
908 }
909 printf("]\n");
910 ap += BITBLK;
911 break;
912 default:
913 printf("bad nfa. opcode %o\n", ap[-1]);
914 exit(1);
915 break;
916 }
917 }
918 #endif /* REGEX_DEBUG */
919 #endif /* DEBUG */
920 #endif /* macintosh or DOS or _WINDOWS or NEED_BSDREGEX */
921