1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Mozilla Public License Version
5  * 1.1 (the "License"); you may not use this file except in compliance with
6  * the License. You may obtain a copy of the License at
7  * http://www.mozilla.org/MPL/
8  *
9  * Software distributed under the License is distributed on an "AS IS" basis,
10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11  * for the specific language governing rights and limitations under the
12  * License.
13  *
14  * The Original Code is Mozilla Communicator client code, released
15  * March 31, 1998.
16  *
17  * The Initial Developer of the Original Code is
18  * Netscape Communications Corporation.
19  * Portions created by the Initial Developer are Copyright (C) 1998-1999
20  * the Initial Developer. All Rights Reserved.
21  *
22  * Contributor(s):
23  *
24  * Alternatively, the contents of this file may be used under the terms of
25  * either of the GNU General Public License Version 2 or later (the "GPL"),
26  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27  * in which case the provisions of the GPL or the LGPL are applicable instead
28  * of those above. If you wish to allow use of your version of this file only
29  * under the terms of either the GPL or the LGPL, and not to allow others to
30  * use your version of this file under the terms of the MPL, indicate your
31  * decision by deleting the provisions above and replace them with the notice
32  * and other provisions required by the GPL or the LGPL. If you do not delete
33  * the provisions above, a recipient may use your version of this file under
34  * the terms of any one of the MPL, the GPL or the LGPL.
35  *
36  * ***** END LICENSE BLOCK ***** */
37 #include "ldap-int.h"
38 #if defined(macintosh) || defined(DOS) || defined(_WINDOWS) || \
39     defined(NEED_BSDREGEX) || defined(XP_OS2)
40 #  include "regex.h"
41 
42 /*
43  * regex - Regular expression pattern matching  and replacement
44  *
45  * By:  Ozan S. Yigit (oz)
46  *      Dept. of Computer Science
47  *      York University
48  *
49  * These routines are the PUBLIC DOMAIN equivalents of regex
50  * routines as found in 4.nBSD UN*X, with minor extensions.
51  *
52  * These routines are derived from various implementations found
53  * in software tools books, and Conroy's grep. They are NOT derived
54  * from licensed/restricted software.
55  * For more interesting/academic/complicated implementations,
56  * see Henry Spencer's regexp routines, or GNU Emacs pattern
57  * matching module.
58  *
59  * Use the actual CCL code in the CLO
60  * section of pmatch. No need for a recursive
61  * pmatch call.
62  *
63  * Use a bitmap table to set char bits in an
64  * 8-bit chunk.
65  *
66  * Interfaces:
67  *      re_comp:        compile a regular expression into a NFA.
68  *
69  *      char *re_comp(s)
70  *      char *s;
71  *
72  *      re_exec:        execute the NFA to match a pattern.
73  *
74  *      int re_exec(s)
75  *      char *s;
76  *
77  *      re_modw    change re_exec's understanding of what a "word"
78  *      looks like (for \< and \>) by adding into the
79  *      hidden word-syntax table.
80  *
81  *      void re_modw(s)
82  *      char *s;
83  *
84  *      re_subs:  substitute the matched portions in a new string.
85  *
86  *      int re_subs(src, dst)
87  *      char *src;
88  *      char *dst;
89  *
90  *      re_fail:  failure routine for re_exec.
91  *
92  *      void re_fail(msg, op)
93  *      char *msg;
94  *      char op;
95  *
96  * Regular Expressions:
97  *
98  *      [1]     char    matches itself, unless it is a special
99  *                      character (metachar): . \ [ ] * + ^ $
100  *
101  *      [2]     .       matches any character.
102  *
103  *      [3]     \       matches the character following it, except
104  *      when followed by a left or right round bracket,
105  *      a digit 1 to 9 or a left or right angle bracket.
106  *      (see [7], [8] and [9])
107  *      It is used as an escape character for all
108  *      other meta-characters, and itself. When used
109  *      in a set ([4]), it is treated as an ordinary
110  *      character.
111  *
112  *      [4]     [set]   matches one of the characters in the set.
113  *                      If the first character in the set is "^",
114  *                      it matches a character NOT in the set, i.e.
115  *      complements the set. A shorthand S-E is
116  *      used to specify a set of characters S up to
117  *      E, inclusive. The special characters "]" and
118  *      "-" have no special meaning if they appear
119  *      as the first chars in the set.
120  *                      examples:        match:
121  *
122  *                              [a-z]    any lowercase alpha
123  *
124  *                              [^]-]    any char except ] and -
125  *
126  *                              [^A-Z]   any char except uppercase
127  *                                       alpha
128  *
129  *                              [a-zA-Z] any alpha
130  *
131  *      [5]     *       any regular expression form [1] to [4], followed by
132  *                      closure char (*) matches zero or more matches of
133  *                      that form.
134  *
135  *      [6]     +       same as [5], except it matches one or more.
136  *
137  *      [7]             a regular expression in the form [1] to [10], enclosed
138  *                      as \(form\) matches what form matches. The enclosure
139  *                      creates a set of tags, used for [8] and for
140  *                      pattern substution. The tagged forms are numbered
141  *      starting from 1.
142  *
143  *      [8]             a \ followed by a digit 1 to 9 matches whatever a
144  *                      previously tagged regular expression ([7]) matched.
145  *
146  *      [9]             \<  a regular expression starting with a \< construct
147  *                      \>  and/or ending with a \> construct, restricts the
148  *                      pattern matching to the beginning of a word, and/or
149  *                      the end of a word. A word is defined to be a character
150  *                      string beginning and/or ending with the characters
151  *                      A-Z a-z 0-9 and _. It must also be preceded and/or
152  *                      followed by any character outside those mentioned.
153  *
154  *      [10]            a composite regular expression xy where x and y
155  *                      are in the form [1] to [10] matches the longest
156  *                      match of x followed by a match for y.
157  *
158  *      [11]            ^  a regular expression starting with a ^ character
159  *                      $  and/or ending with a $ character, restricts the
160  *                      pattern matching to the beginning of the line,
161  *                      or the end of line. [anchors] Elsewhere in the
162  *                      pattern, ^ and $ are treated as ordinary characters.
163  *
164  *
165  * Acknowledgements:
166  *
167  * HCR's Hugh Redelmeier has been most helpful in various
168  * stages of development. He convinced me to include BOW
169  * and EOW constructs, originally invented by Rob Pike at
170  * the University of Toronto.
171  *
172  * References:
173  *              Software tools     Kernighan & Plauger
174  *              Software tools in Pascal        Kernighan & Plauger
175  *              Grep [rsx-11 C dist]            David Conroy
176  *              ed - text editor    Un*x Programmer's Manual
177  *              Advanced editing on Un*x  B. W. Kernighan
178  *              RegExp routines      Henry Spencer
179  *
180  * Notes:
181  *
182  * This implementation uses a bit-set representation for character
183  * classes for speed and compactness. Each character is represented
184  * by one bit in a 128-bit block. Thus, CCL always takes a
185  * constant 16 bytes in the internal nfa, and re_exec does a single
186  * bit comparison to locate the character in the set.
187  *
188  * Examples:
189  *
190  * pattern:  foo*.*
191  * compile:  CHR f CHR o CLO CHR o END CLO ANY END END
192  * matches:  fo foo fooo foobar fobar foxx ...
193  *
194  * pattern:  fo[ob]a[rz]
195  * compile:  CHR f CHR o CCL bitset CHR a CCL bitset END
196  * matches:  fobar fooar fobaz fooaz
197  *
198  * pattern:  foo\\+
199  * compile:  CHR f CHR o CHR o CHR \ CLO CHR \ END END
200  * matches:  foo\ foo\\ foo\\\  ...
201  *
202  * pattern:  \(foo\)[1-3]\1  (same as foo[1-3]foo)
203  * compile:  BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
204  * matches:  foo1foo foo2foo foo3foo
205  *
206  * pattern:  \(fo.*\)-\1
207  * compile:  BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
208  * matches:  foo-foo fo-fo fob-fob foobar-foobar ...
209  */
210 
211 #  define MAXNFA 1024
212 #  define MAXTAG 10
213 
214 #  define OKP 1
215 #  define NOP 0
216 
217 #  define CHR 1
218 #  define ANY 2
219 #  define CCL 3
220 #  define BOL 4
221 #  define EOL 5
222 #  define BOT 6
223 #  define EOT 7
224 #  define BOW 8
225 #  define EOW 9
226 #  define REF 10
227 #  define CLO 11
228 
229 #  define END 0
230 
231 /*
232  * The following defines are not meant to be changeable.
233  * They are for readability only.
234  */
235 #  define MAXCHR 128
236 #  define CHRBIT 8
237 #  define BITBLK MAXCHR / CHRBIT
238 #  define BLKIND 0170
239 #  define BITIND 07
240 
241 #  define ASCIIB 0177
242 
243 /* Plain char, on the other hand, may be signed or unsigned; it depends on
244  * the platform and perhaps a compiler option.  A hard fact of life, in C.
245  *
246  * 6-April-1999 mcs@netscape.com: replaced CHAR with REGEXCHAR to avoid
247  *              conflicts with system types on Win32.   Changed typedef
248  *              for REGEXCHAR to always be unsigned, which seems right.
249  */
250 typedef unsigned char REGEXCHAR;
251 
252 static int tagstk[MAXTAG];    /* subpat tag stack..*/
253 static REGEXCHAR nfa[MAXNFA]; /* automaton..       */
254 static int sta = NOP;         /* status of lastpat */
255 
256 static REGEXCHAR bittab[BITBLK]; /* bit table for CCL */
257                                  /* pre-set bits...   */
258 static REGEXCHAR bitarr[] = {1, 2, 4, 8, 16, 32, 64, 128};
259 
chset(REGEXCHAR c)260 static void chset(REGEXCHAR c) {
261   bittab[((c) & (unsigned)BLKIND) >> 3] |= bitarr[(c)&BITIND];
262 }
263 
264 #  define badpat(x) (*nfa = END, x)
265 #  define store(x) *mp++ = x
266 
re_comp(const char * pat)267 char* LDAP_CALL re_comp(const char* pat) {
268   register REGEXCHAR* p;        /* pattern pointer   */
269   register REGEXCHAR* mp = nfa; /* nfa pointer       */
270   register REGEXCHAR* lp;       /* saved pointer..   */
271   register REGEXCHAR* sp = nfa; /* another one..     */
272 
273   register int tagi = 0; /* tag stack index   */
274   register int tagc = 1; /* actual tag count  */
275 
276   register int n;
277   register REGEXCHAR mask; /* xor mask -CCL/NCL */
278   int c1, c2;
279 
280   if (!pat || !*pat) {
281     if (sta) {
282       return 0;
283     } else {
284       return badpat("No previous regular expression");
285     }
286   }
287   sta = NOP;
288 
289   for (p = (REGEXCHAR*)pat; *p; p++) {
290     lp = mp;
291     switch (*p) {
292       case '.': /* match any char..  */
293         store(ANY);
294         break;
295 
296       case '^': /* match beginning.. */
297         if (p == (REGEXCHAR*)pat)
298           store(BOL);
299         else {
300           store(CHR);
301           store(*p);
302         }
303         break;
304 
305       case '$': /* match endofline.. */
306         if (!*(p + 1))
307           store(EOL);
308         else {
309           store(CHR);
310           store(*p);
311         }
312         break;
313 
314       case '[': /* match char class..*/
315         store(CCL);
316 
317         if (*++p == '^') {
318           mask = 0377;
319           p++;
320         } else
321           mask = 0;
322 
323         if (*p == '-') /* real dash */
324           chset(*p++);
325         if (*p == ']') /* real brac */
326           chset(*p++);
327         while (*p && *p != ']') {
328           if (*p == '-' && *(p + 1) && *(p + 1) != ']') {
329             p++;
330             c1 = *(p - 2) + 1;
331             c2 = *p++;
332             while (c1 <= c2) chset((REGEXCHAR)c1++);
333           }
334 #  ifdef EXTEND
335           else if (*p == '\\' && *(p + 1)) {
336             p++;
337             chset(*p++);
338           }
339 #  endif
340           else
341             chset(*p++);
342         }
343         if (!*p) return badpat("Missing ]");
344 
345         for (n = 0; n < BITBLK; bittab[n++] = (REGEXCHAR)0)
346           store(mask ^ bittab[n]);
347 
348         break;
349 
350       case '*': /* match 0 or more.. */
351       case '+': /* match 1 or more.. */
352         if (p == (REGEXCHAR*)pat) return badpat("Empty closure");
353         lp = sp;        /* previous opcode */
354         if (*lp == CLO) /* equivalence..   */
355           break;
356         switch (*lp) {
357           case BOL:
358           case BOT:
359           case EOT:
360           case BOW:
361           case EOW:
362           case REF:
363             return badpat("Illegal closure");
364           default:
365             break;
366         }
367 
368         if (*p == '+')
369           for (sp = mp; lp < sp; lp++) store(*lp);
370 
371         store(END);
372         store(END);
373         sp = mp;
374         while (--mp > lp) *mp = mp[-1];
375         store(CLO);
376         mp = sp;
377         break;
378 
379       case '\\': /* tags, backrefs .. */
380         switch (*++p) {
381           case '(':
382             if (tagc < MAXTAG) {
383               tagstk[++tagi] = tagc;
384               store(BOT);
385               store(tagc++);
386             } else
387               return badpat("Too many \\(\\) pairs");
388             break;
389           case ')':
390             if (*sp == BOT) return badpat("Null pattern inside \\(\\)");
391             if (tagi > 0) {
392               store(EOT);
393               store(tagstk[tagi--]);
394             } else
395               return badpat("Unmatched \\)");
396             break;
397           case '<':
398             store(BOW);
399             break;
400           case '>':
401             if (*sp == BOW) return badpat("Null pattern inside \\<\\>");
402             store(EOW);
403             break;
404           case '1':
405           case '2':
406           case '3':
407           case '4':
408           case '5':
409           case '6':
410           case '7':
411           case '8':
412           case '9':
413             n = *p - '0';
414             if (tagi > 0 && tagstk[tagi] == n)
415               return badpat("Cyclical reference");
416             if (tagc > n) {
417               store(REF);
418               store(n);
419             } else
420               return badpat("Undetermined reference");
421             break;
422 #  ifdef EXTEND
423           case 'b':
424             store(CHR);
425             store('\b');
426             break;
427           case 'n':
428             store(CHR);
429             store('\n');
430             break;
431           case 'f':
432             store(CHR);
433             store('\f');
434             break;
435           case 'r':
436             store(CHR);
437             store('\r');
438             break;
439           case 't':
440             store(CHR);
441             store('\t');
442             break;
443 #  endif
444           default:
445             store(CHR);
446             store(*p);
447         }
448         break;
449 
450       default: /* an ordinary char  */
451         store(CHR);
452         store(*p);
453         break;
454     }
455     sp = lp;
456   }
457   if (tagi > 0) return badpat("Unmatched \\(");
458   store(END);
459   sta = OKP;
460   return 0;
461 }
462 
463 static REGEXCHAR* bol;
464 static REGEXCHAR* bopat[MAXTAG];
465 static REGEXCHAR* eopat[MAXTAG];
466 #  ifdef NEEDPROTOS
467 static REGEXCHAR* pmatch(REGEXCHAR* lp, REGEXCHAR* ap);
468 #  else  /* NEEDPROTOS */
469 static REGEXCHAR* pmatch();
470 #  endif /* NEEDPROTOS */
471 
472 /*
473  * re_exec:
474  *   execute nfa to find a match.
475  *
476  * special cases: (nfa[0])
477  *   BOL
478  *     Match only once, starting from the
479  *     beginning.
480  *   CHR
481  *     First locate the character without
482  *     calling pmatch, and if found, call
483  *     pmatch for the remaining string.
484  *   END
485  *     re_comp failed, poor luser did not
486  *     check for it. Fail fast.
487  *
488  * If a match is found, bopat[0] and eopat[0] are set
489  * to the beginning and the end of the matched fragment,
490  * respectively.
491  *
492  */
493 
re_exec(const char * lp)494 int LDAP_CALL re_exec(const char* lp) {
495   register REGEXCHAR c;
496   register REGEXCHAR* ep = 0;
497   register REGEXCHAR* ap = nfa;
498 
499   bol = (REGEXCHAR*)lp;
500 
501   bopat[0] = 0;
502   bopat[1] = 0;
503   bopat[2] = 0;
504   bopat[3] = 0;
505   bopat[4] = 0;
506   bopat[5] = 0;
507   bopat[6] = 0;
508   bopat[7] = 0;
509   bopat[8] = 0;
510   bopat[9] = 0;
511 
512   switch (*ap) {
513     case BOL: /* anchored: match from BOL only */
514       ep = pmatch((REGEXCHAR*)lp, ap);
515       break;
516     case CHR: /* ordinary char: locate it fast */
517       c = *(ap + 1);
518       while (*lp && *(REGEXCHAR*)lp != c) lp++;
519       if (!*lp) /* if EOS, fail, else fall through. */
520         return 0;
521     default: /* regular matching all the way. */
522       do {
523         if ((ep = pmatch((REGEXCHAR*)lp, ap))) break;
524         lp++;
525       } while (*lp);
526 
527       break;
528     case END: /* munged automaton. fail always */
529       return 0;
530   }
531   if (!ep) return 0;
532 
533   bopat[0] = (REGEXCHAR*)lp;
534   eopat[0] = ep;
535   return 1;
536 }
537 
538 /*
539  * pmatch: internal routine for the hard part
540  *
541  * This code is partly snarfed from an early grep written by
542  * David Conroy. The backref and tag stuff, and various other
543  * innovations are by oz.
544  *
545  * special case optimizations: (nfa[n], nfa[n+1])
546  *   CLO ANY
547  *     We KNOW .* will match everything up to the
548  *     end of line. Thus, directly go to the end of
549  *     line, without recursive pmatch calls. As in
550  *     the other closure cases, the remaining pattern
551  *     must be matched by moving backwards on the
552  *     string recursively, to find a match for xy
553  *     (x is ".*" and y is the remaining pattern)
554  *     where the match satisfies the LONGEST match for
555  *     x followed by a match for y.
556  *   CLO CHR
557  *     We can again scan the string forward for the
558  *     single char and at the point of failure, we
559  *     execute the remaining nfa recursively, same as
560  *     above.
561  *
562  * At the end of a successful match, bopat[n] and eopat[n]
563  * are set to the beginning and end of subpatterns matched
564  * by tagged expressions (n = 1 to 9).
565  *
566  */
567 
568 #  ifndef re_fail
569 extern void re_fail();
570 #  endif /* re_fail */
571 
572 /*
573  * character classification table for word boundary operators BOW
574  * and EOW. the reason for not using ctype macros is that we can
575  * let the user add into our own table. see re_modw. This table
576  * is not in the bitset form, since we may wish to extend it in the
577  * future for other character classifications.
578  *
579  * TRUE for 0-9 A-Z a-z _
580  */
581 static char chrtyp[MAXCHR] = {
582     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
583     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584     0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
585     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
586     1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
587     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};
588 
589 #  define HIBIT 0200
590 #  define inascii(x) (0177 & (x))
591 #  define iswordc(x) chrtyp[inascii(x)]
592 #  define isinset(x, y) \
593     (((y)&HIBIT) ? 0 : ((x)[((y)&BLKIND) >> 3] & bitarr[(y)&BITIND]))
594 
595 /*
596  * skip values for CLO XXX to skip past the closure
597  */
598 
599 #  define ANYSKIP 2  /* [CLO] ANY END ...       */
600 #  define CHRSKIP 3  /* [CLO] CHR chr END ...     */
601 #  define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */
602 
pmatch(REGEXCHAR * lp,REGEXCHAR * ap)603 static REGEXCHAR* pmatch(REGEXCHAR* lp, REGEXCHAR* ap) {
604   register int op, c, n;
605   register REGEXCHAR* e;  /* extra pointer for CLO */
606   register REGEXCHAR* bp; /* beginning of subpat.. */
607   register REGEXCHAR* ep; /* ending of subpat..   */
608   REGEXCHAR* are;         /* to save the line ptr. */
609 
610   while ((op = *ap++) != END) switch (op) {
611       case CHR:
612         if (*lp++ != *ap++) return 0;
613         break;
614       case ANY:
615         if (!*lp++) return 0;
616         break;
617       case CCL:
618         c = *lp++;
619         if (!isinset(ap, c)) return 0;
620         ap += BITBLK;
621         break;
622       case BOL:
623         if (lp != bol) return 0;
624         break;
625       case EOL:
626         if (*lp) return 0;
627         break;
628       case BOT:
629         bopat[*ap++] = lp;
630         break;
631       case EOT:
632         eopat[*ap++] = lp;
633         break;
634       case BOW:
635         if ((lp != bol && iswordc(lp[-1])) || !iswordc(*lp)) return 0;
636         break;
637       case EOW:
638         if (lp == bol || !iswordc(lp[-1]) || iswordc(*lp)) return 0;
639         break;
640       case REF:
641         n = *ap++;
642         bp = bopat[n];
643         ep = eopat[n];
644         while (bp < ep)
645           if (*bp++ != *lp++) return 0;
646         break;
647       case CLO:
648         are = lp;
649         switch (*ap) {
650           case ANY:
651             while (*lp) lp++;
652             n = ANYSKIP;
653             break;
654           case CHR:
655             c = *(ap + 1);
656             while (*lp && c == *lp) lp++;
657             n = CHRSKIP;
658             break;
659           case CCL:
660             while ((c = *lp) && isinset(ap + 1, c)) lp++;
661             n = CCLSKIP;
662             break;
663           default:
664             re_fail("closure: bad nfa.", *ap);
665             return 0;
666         }
667 
668         ap += n;
669 
670         while (lp >= are) {
671           if ((e = pmatch(lp, ap))) return e;
672           --lp;
673         }
674         return 0;
675       default:
676         re_fail("re_exec: bad nfa.", op);
677         return 0;
678     }
679   return lp;
680 }
681 
682 /*
683  * re_modw:
684  * add new characters into the word table to change re_exec's
685  * understanding of what a word should look like. Note that we
686  * only accept additions into the word definition.
687  *
688  * If the string parameter is 0 or null string, the table is
689  * reset back to the default containing A-Z a-z 0-9 _. [We use
690  * the compact bitset representation for the default table]
691  */
692 
693 static REGEXCHAR deftab[16] = {0,    0,    0,    0,    0,    0,    0377, 003,
694                                0376, 0377, 0377, 0207, 0376, 0377, 0377, 007};
695 
re_modw(char * s)696 void LDAP_CALL re_modw(char* s) {
697   register int i;
698 
699   if (!s || !*s) {
700     for (i = 0; i < MAXCHR; i++)
701       if (!isinset(deftab, i)) iswordc(i) = 0;
702   } else
703     while (*s) iswordc(*s++) = 1;
704 }
705 
706 /*
707  * re_subs:
708  * substitute the matched portions of the src in dst.
709  *
710  * &  substitute the entire matched pattern.
711  *
712  * \digit  substitute a subpattern, with the given  tag number.
713  *   Tags are numbered from 1 to 9. If the particular
714  *   tagged subpattern does not exist, null is substituted.
715  */
re_subs(char * src,char * dst)716 int LDAP_CALL re_subs(char* src, char* dst) {
717   register char c;
718   register int pin;
719   register REGEXCHAR* bp;
720   register REGEXCHAR* ep;
721 
722   if (!*src || !bopat[0]) return 0;
723 
724   while ((c = *src++)) {
725     switch (c) {
726       case '&':
727         pin = 0;
728         break;
729 
730       case '\\':
731         c = *src++;
732         if (c >= '0' && c <= '9') {
733           pin = c - '0';
734           break;
735         }
736 
737       default:
738         *dst++ = c;
739         continue;
740     }
741 
742     if ((bp = bopat[pin]) && (ep = eopat[pin])) {
743       while (*bp && bp < ep) *dst++ = *(char*)bp++;
744       if (bp < ep) return 0;
745     }
746   }
747   *dst = (char)0;
748   return 1;
749 }
750 
751 #  ifdef DEBUG
752 
753 /* No printf or exit in 16-bit Windows */
754 #    if defined(_WINDOWS) && !defined(_WIN32)
printf(const char * pszFormat,...)755 static int LDAP_C printf(const char* pszFormat, ...) {
756   char buf[1024];
757   va_list arglist;
758   va_start(arglist, pszFormat);
759   vsprintf(buf, pszFormat, arglist);
760   va_end(arglist);
761   OutputDebugString(buf);
762   return 0;
763 }
764 #      define exit(v) return
765 #    endif /* 16-bit Windows */
766 
767 #    ifdef REGEX_DEBUG
768 
769 static void nfadump(REGEXCHAR* ap);
770 
771 /*
772  * symbolic - produce a symbolic dump of the nfa
773  */
symbolic(char * s)774 void symbolic(char* s) {
775   printf("pattern: %s\n", s);
776   printf("nfacode:\n");
777   nfadump(nfa);
778 }
779 
nfadump(REGEXCHAR * ap)780 static void nfadump(REGEXCHAR* ap) {
781   register int n;
782 
783   while (*ap != END) switch (*ap++) {
784       case CLO:
785         printf("CLOSURE");
786         nfadump(ap);
787         switch (*ap) {
788           case CHR:
789             n = CHRSKIP;
790             break;
791           case ANY:
792             n = ANYSKIP;
793             break;
794           case CCL:
795             n = CCLSKIP;
796             break;
797         }
798         ap += n;
799         break;
800       case CHR:
801         printf("\tCHR %c\n", *ap++);
802         break;
803       case ANY:
804         printf("\tANY .\n");
805         break;
806       case BOL:
807         printf("\tBOL -\n");
808         break;
809       case EOL:
810         printf("\tEOL -\n");
811         break;
812       case BOT:
813         printf("BOT: %d\n", *ap++);
814         break;
815       case EOT:
816         printf("EOT: %d\n", *ap++);
817         break;
818       case BOW:
819         printf("BOW\n");
820         break;
821       case EOW:
822         printf("EOW\n");
823         break;
824       case REF:
825         printf("REF: %d\n", *ap++);
826         break;
827       case CCL:
828         printf("\tCCL [");
829         for (n = 0; n < MAXCHR; n++)
830           if (isinset(ap, (REGEXCHAR)n)) {
831             if (n < ' ')
832               printf("^%c", n ^ 0x040);
833             else
834               printf("%c", n);
835           }
836         printf("]\n");
837         ap += BITBLK;
838         break;
839       default:
840         printf("bad nfa. opcode %o\n", ap[-1]);
841         exit(1);
842         break;
843     }
844 }
845 #    endif /* REGEX_DEBUG */
846 #  endif   /* DEBUG */
847 #endif     /* macintosh or DOS or _WINDOWS or NEED_BSDREGEX */
848