1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Original Code is Mozilla Communicator client code, released
15 * March 31, 1998.
16 *
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998-1999
20 * the Initial Developer. All Rights Reserved.
21 *
22 * Contributor(s):
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37 #include "ldap-int.h"
38 #if defined(macintosh) || defined(DOS) || defined(_WINDOWS) || \
39 defined(NEED_BSDREGEX) || defined(XP_OS2)
40 # include "regex.h"
41
42 /*
43 * regex - Regular expression pattern matching and replacement
44 *
45 * By: Ozan S. Yigit (oz)
46 * Dept. of Computer Science
47 * York University
48 *
49 * These routines are the PUBLIC DOMAIN equivalents of regex
50 * routines as found in 4.nBSD UN*X, with minor extensions.
51 *
52 * These routines are derived from various implementations found
53 * in software tools books, and Conroy's grep. They are NOT derived
54 * from licensed/restricted software.
55 * For more interesting/academic/complicated implementations,
56 * see Henry Spencer's regexp routines, or GNU Emacs pattern
57 * matching module.
58 *
59 * Use the actual CCL code in the CLO
60 * section of pmatch. No need for a recursive
61 * pmatch call.
62 *
63 * Use a bitmap table to set char bits in an
64 * 8-bit chunk.
65 *
66 * Interfaces:
67 * re_comp: compile a regular expression into a NFA.
68 *
69 * char *re_comp(s)
70 * char *s;
71 *
72 * re_exec: execute the NFA to match a pattern.
73 *
74 * int re_exec(s)
75 * char *s;
76 *
77 * re_modw change re_exec's understanding of what a "word"
78 * looks like (for \< and \>) by adding into the
79 * hidden word-syntax table.
80 *
81 * void re_modw(s)
82 * char *s;
83 *
84 * re_subs: substitute the matched portions in a new string.
85 *
86 * int re_subs(src, dst)
87 * char *src;
88 * char *dst;
89 *
90 * re_fail: failure routine for re_exec.
91 *
92 * void re_fail(msg, op)
93 * char *msg;
94 * char op;
95 *
96 * Regular Expressions:
97 *
98 * [1] char matches itself, unless it is a special
99 * character (metachar): . \ [ ] * + ^ $
100 *
101 * [2] . matches any character.
102 *
103 * [3] \ matches the character following it, except
104 * when followed by a left or right round bracket,
105 * a digit 1 to 9 or a left or right angle bracket.
106 * (see [7], [8] and [9])
107 * It is used as an escape character for all
108 * other meta-characters, and itself. When used
109 * in a set ([4]), it is treated as an ordinary
110 * character.
111 *
112 * [4] [set] matches one of the characters in the set.
113 * If the first character in the set is "^",
114 * it matches a character NOT in the set, i.e.
115 * complements the set. A shorthand S-E is
116 * used to specify a set of characters S up to
117 * E, inclusive. The special characters "]" and
118 * "-" have no special meaning if they appear
119 * as the first chars in the set.
120 * examples: match:
121 *
122 * [a-z] any lowercase alpha
123 *
124 * [^]-] any char except ] and -
125 *
126 * [^A-Z] any char except uppercase
127 * alpha
128 *
129 * [a-zA-Z] any alpha
130 *
131 * [5] * any regular expression form [1] to [4], followed by
132 * closure char (*) matches zero or more matches of
133 * that form.
134 *
135 * [6] + same as [5], except it matches one or more.
136 *
137 * [7] a regular expression in the form [1] to [10], enclosed
138 * as \(form\) matches what form matches. The enclosure
139 * creates a set of tags, used for [8] and for
140 * pattern substution. The tagged forms are numbered
141 * starting from 1.
142 *
143 * [8] a \ followed by a digit 1 to 9 matches whatever a
144 * previously tagged regular expression ([7]) matched.
145 *
146 * [9] \< a regular expression starting with a \< construct
147 * \> and/or ending with a \> construct, restricts the
148 * pattern matching to the beginning of a word, and/or
149 * the end of a word. A word is defined to be a character
150 * string beginning and/or ending with the characters
151 * A-Z a-z 0-9 and _. It must also be preceded and/or
152 * followed by any character outside those mentioned.
153 *
154 * [10] a composite regular expression xy where x and y
155 * are in the form [1] to [10] matches the longest
156 * match of x followed by a match for y.
157 *
158 * [11] ^ a regular expression starting with a ^ character
159 * $ and/or ending with a $ character, restricts the
160 * pattern matching to the beginning of the line,
161 * or the end of line. [anchors] Elsewhere in the
162 * pattern, ^ and $ are treated as ordinary characters.
163 *
164 *
165 * Acknowledgements:
166 *
167 * HCR's Hugh Redelmeier has been most helpful in various
168 * stages of development. He convinced me to include BOW
169 * and EOW constructs, originally invented by Rob Pike at
170 * the University of Toronto.
171 *
172 * References:
173 * Software tools Kernighan & Plauger
174 * Software tools in Pascal Kernighan & Plauger
175 * Grep [rsx-11 C dist] David Conroy
176 * ed - text editor Un*x Programmer's Manual
177 * Advanced editing on Un*x B. W. Kernighan
178 * RegExp routines Henry Spencer
179 *
180 * Notes:
181 *
182 * This implementation uses a bit-set representation for character
183 * classes for speed and compactness. Each character is represented
184 * by one bit in a 128-bit block. Thus, CCL always takes a
185 * constant 16 bytes in the internal nfa, and re_exec does a single
186 * bit comparison to locate the character in the set.
187 *
188 * Examples:
189 *
190 * pattern: foo*.*
191 * compile: CHR f CHR o CLO CHR o END CLO ANY END END
192 * matches: fo foo fooo foobar fobar foxx ...
193 *
194 * pattern: fo[ob]a[rz]
195 * compile: CHR f CHR o CCL bitset CHR a CCL bitset END
196 * matches: fobar fooar fobaz fooaz
197 *
198 * pattern: foo\\+
199 * compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END
200 * matches: foo\ foo\\ foo\\\ ...
201 *
202 * pattern: \(foo\)[1-3]\1 (same as foo[1-3]foo)
203 * compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END
204 * matches: foo1foo foo2foo foo3foo
205 *
206 * pattern: \(fo.*\)-\1
207 * compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END
208 * matches: foo-foo fo-fo fob-fob foobar-foobar ...
209 */
210
211 # define MAXNFA 1024
212 # define MAXTAG 10
213
214 # define OKP 1
215 # define NOP 0
216
217 # define CHR 1
218 # define ANY 2
219 # define CCL 3
220 # define BOL 4
221 # define EOL 5
222 # define BOT 6
223 # define EOT 7
224 # define BOW 8
225 # define EOW 9
226 # define REF 10
227 # define CLO 11
228
229 # define END 0
230
231 /*
232 * The following defines are not meant to be changeable.
233 * They are for readability only.
234 */
235 # define MAXCHR 128
236 # define CHRBIT 8
237 # define BITBLK MAXCHR / CHRBIT
238 # define BLKIND 0170
239 # define BITIND 07
240
241 # define ASCIIB 0177
242
243 /* Plain char, on the other hand, may be signed or unsigned; it depends on
244 * the platform and perhaps a compiler option. A hard fact of life, in C.
245 *
246 * 6-April-1999 mcs@netscape.com: replaced CHAR with REGEXCHAR to avoid
247 * conflicts with system types on Win32. Changed typedef
248 * for REGEXCHAR to always be unsigned, which seems right.
249 */
250 typedef unsigned char REGEXCHAR;
251
252 static int tagstk[MAXTAG]; /* subpat tag stack..*/
253 static REGEXCHAR nfa[MAXNFA]; /* automaton.. */
254 static int sta = NOP; /* status of lastpat */
255
256 static REGEXCHAR bittab[BITBLK]; /* bit table for CCL */
257 /* pre-set bits... */
258 static REGEXCHAR bitarr[] = {1, 2, 4, 8, 16, 32, 64, 128};
259
chset(REGEXCHAR c)260 static void chset(REGEXCHAR c) {
261 bittab[((c) & (unsigned)BLKIND) >> 3] |= bitarr[(c)&BITIND];
262 }
263
264 # define badpat(x) (*nfa = END, x)
265 # define store(x) *mp++ = x
266
re_comp(const char * pat)267 char* LDAP_CALL re_comp(const char* pat) {
268 register REGEXCHAR* p; /* pattern pointer */
269 register REGEXCHAR* mp = nfa; /* nfa pointer */
270 register REGEXCHAR* lp; /* saved pointer.. */
271 register REGEXCHAR* sp = nfa; /* another one.. */
272
273 register int tagi = 0; /* tag stack index */
274 register int tagc = 1; /* actual tag count */
275
276 register int n;
277 register REGEXCHAR mask; /* xor mask -CCL/NCL */
278 int c1, c2;
279
280 if (!pat || !*pat) {
281 if (sta) {
282 return 0;
283 } else {
284 return badpat("No previous regular expression");
285 }
286 }
287 sta = NOP;
288
289 for (p = (REGEXCHAR*)pat; *p; p++) {
290 lp = mp;
291 switch (*p) {
292 case '.': /* match any char.. */
293 store(ANY);
294 break;
295
296 case '^': /* match beginning.. */
297 if (p == (REGEXCHAR*)pat)
298 store(BOL);
299 else {
300 store(CHR);
301 store(*p);
302 }
303 break;
304
305 case '$': /* match endofline.. */
306 if (!*(p + 1))
307 store(EOL);
308 else {
309 store(CHR);
310 store(*p);
311 }
312 break;
313
314 case '[': /* match char class..*/
315 store(CCL);
316
317 if (*++p == '^') {
318 mask = 0377;
319 p++;
320 } else
321 mask = 0;
322
323 if (*p == '-') /* real dash */
324 chset(*p++);
325 if (*p == ']') /* real brac */
326 chset(*p++);
327 while (*p && *p != ']') {
328 if (*p == '-' && *(p + 1) && *(p + 1) != ']') {
329 p++;
330 c1 = *(p - 2) + 1;
331 c2 = *p++;
332 while (c1 <= c2) chset((REGEXCHAR)c1++);
333 }
334 # ifdef EXTEND
335 else if (*p == '\\' && *(p + 1)) {
336 p++;
337 chset(*p++);
338 }
339 # endif
340 else
341 chset(*p++);
342 }
343 if (!*p) return badpat("Missing ]");
344
345 for (n = 0; n < BITBLK; bittab[n++] = (REGEXCHAR)0)
346 store(mask ^ bittab[n]);
347
348 break;
349
350 case '*': /* match 0 or more.. */
351 case '+': /* match 1 or more.. */
352 if (p == (REGEXCHAR*)pat) return badpat("Empty closure");
353 lp = sp; /* previous opcode */
354 if (*lp == CLO) /* equivalence.. */
355 break;
356 switch (*lp) {
357 case BOL:
358 case BOT:
359 case EOT:
360 case BOW:
361 case EOW:
362 case REF:
363 return badpat("Illegal closure");
364 default:
365 break;
366 }
367
368 if (*p == '+')
369 for (sp = mp; lp < sp; lp++) store(*lp);
370
371 store(END);
372 store(END);
373 sp = mp;
374 while (--mp > lp) *mp = mp[-1];
375 store(CLO);
376 mp = sp;
377 break;
378
379 case '\\': /* tags, backrefs .. */
380 switch (*++p) {
381 case '(':
382 if (tagc < MAXTAG) {
383 tagstk[++tagi] = tagc;
384 store(BOT);
385 store(tagc++);
386 } else
387 return badpat("Too many \\(\\) pairs");
388 break;
389 case ')':
390 if (*sp == BOT) return badpat("Null pattern inside \\(\\)");
391 if (tagi > 0) {
392 store(EOT);
393 store(tagstk[tagi--]);
394 } else
395 return badpat("Unmatched \\)");
396 break;
397 case '<':
398 store(BOW);
399 break;
400 case '>':
401 if (*sp == BOW) return badpat("Null pattern inside \\<\\>");
402 store(EOW);
403 break;
404 case '1':
405 case '2':
406 case '3':
407 case '4':
408 case '5':
409 case '6':
410 case '7':
411 case '8':
412 case '9':
413 n = *p - '0';
414 if (tagi > 0 && tagstk[tagi] == n)
415 return badpat("Cyclical reference");
416 if (tagc > n) {
417 store(REF);
418 store(n);
419 } else
420 return badpat("Undetermined reference");
421 break;
422 # ifdef EXTEND
423 case 'b':
424 store(CHR);
425 store('\b');
426 break;
427 case 'n':
428 store(CHR);
429 store('\n');
430 break;
431 case 'f':
432 store(CHR);
433 store('\f');
434 break;
435 case 'r':
436 store(CHR);
437 store('\r');
438 break;
439 case 't':
440 store(CHR);
441 store('\t');
442 break;
443 # endif
444 default:
445 store(CHR);
446 store(*p);
447 }
448 break;
449
450 default: /* an ordinary char */
451 store(CHR);
452 store(*p);
453 break;
454 }
455 sp = lp;
456 }
457 if (tagi > 0) return badpat("Unmatched \\(");
458 store(END);
459 sta = OKP;
460 return 0;
461 }
462
463 static REGEXCHAR* bol;
464 static REGEXCHAR* bopat[MAXTAG];
465 static REGEXCHAR* eopat[MAXTAG];
466 # ifdef NEEDPROTOS
467 static REGEXCHAR* pmatch(REGEXCHAR* lp, REGEXCHAR* ap);
468 # else /* NEEDPROTOS */
469 static REGEXCHAR* pmatch();
470 # endif /* NEEDPROTOS */
471
472 /*
473 * re_exec:
474 * execute nfa to find a match.
475 *
476 * special cases: (nfa[0])
477 * BOL
478 * Match only once, starting from the
479 * beginning.
480 * CHR
481 * First locate the character without
482 * calling pmatch, and if found, call
483 * pmatch for the remaining string.
484 * END
485 * re_comp failed, poor luser did not
486 * check for it. Fail fast.
487 *
488 * If a match is found, bopat[0] and eopat[0] are set
489 * to the beginning and the end of the matched fragment,
490 * respectively.
491 *
492 */
493
re_exec(const char * lp)494 int LDAP_CALL re_exec(const char* lp) {
495 register REGEXCHAR c;
496 register REGEXCHAR* ep = 0;
497 register REGEXCHAR* ap = nfa;
498
499 bol = (REGEXCHAR*)lp;
500
501 bopat[0] = 0;
502 bopat[1] = 0;
503 bopat[2] = 0;
504 bopat[3] = 0;
505 bopat[4] = 0;
506 bopat[5] = 0;
507 bopat[6] = 0;
508 bopat[7] = 0;
509 bopat[8] = 0;
510 bopat[9] = 0;
511
512 switch (*ap) {
513 case BOL: /* anchored: match from BOL only */
514 ep = pmatch((REGEXCHAR*)lp, ap);
515 break;
516 case CHR: /* ordinary char: locate it fast */
517 c = *(ap + 1);
518 while (*lp && *(REGEXCHAR*)lp != c) lp++;
519 if (!*lp) /* if EOS, fail, else fall through. */
520 return 0;
521 default: /* regular matching all the way. */
522 do {
523 if ((ep = pmatch((REGEXCHAR*)lp, ap))) break;
524 lp++;
525 } while (*lp);
526
527 break;
528 case END: /* munged automaton. fail always */
529 return 0;
530 }
531 if (!ep) return 0;
532
533 bopat[0] = (REGEXCHAR*)lp;
534 eopat[0] = ep;
535 return 1;
536 }
537
538 /*
539 * pmatch: internal routine for the hard part
540 *
541 * This code is partly snarfed from an early grep written by
542 * David Conroy. The backref and tag stuff, and various other
543 * innovations are by oz.
544 *
545 * special case optimizations: (nfa[n], nfa[n+1])
546 * CLO ANY
547 * We KNOW .* will match everything up to the
548 * end of line. Thus, directly go to the end of
549 * line, without recursive pmatch calls. As in
550 * the other closure cases, the remaining pattern
551 * must be matched by moving backwards on the
552 * string recursively, to find a match for xy
553 * (x is ".*" and y is the remaining pattern)
554 * where the match satisfies the LONGEST match for
555 * x followed by a match for y.
556 * CLO CHR
557 * We can again scan the string forward for the
558 * single char and at the point of failure, we
559 * execute the remaining nfa recursively, same as
560 * above.
561 *
562 * At the end of a successful match, bopat[n] and eopat[n]
563 * are set to the beginning and end of subpatterns matched
564 * by tagged expressions (n = 1 to 9).
565 *
566 */
567
568 # ifndef re_fail
569 extern void re_fail();
570 # endif /* re_fail */
571
572 /*
573 * character classification table for word boundary operators BOW
574 * and EOW. the reason for not using ctype macros is that we can
575 * let the user add into our own table. see re_modw. This table
576 * is not in the bitset form, since we may wish to extend it in the
577 * future for other character classifications.
578 *
579 * TRUE for 0-9 A-Z a-z _
580 */
581 static char chrtyp[MAXCHR] = {
582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
585 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
586 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
587 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};
588
589 # define HIBIT 0200
590 # define inascii(x) (0177 & (x))
591 # define iswordc(x) chrtyp[inascii(x)]
592 # define isinset(x, y) \
593 (((y)&HIBIT) ? 0 : ((x)[((y)&BLKIND) >> 3] & bitarr[(y)&BITIND]))
594
595 /*
596 * skip values for CLO XXX to skip past the closure
597 */
598
599 # define ANYSKIP 2 /* [CLO] ANY END ... */
600 # define CHRSKIP 3 /* [CLO] CHR chr END ... */
601 # define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */
602
pmatch(REGEXCHAR * lp,REGEXCHAR * ap)603 static REGEXCHAR* pmatch(REGEXCHAR* lp, REGEXCHAR* ap) {
604 register int op, c, n;
605 register REGEXCHAR* e; /* extra pointer for CLO */
606 register REGEXCHAR* bp; /* beginning of subpat.. */
607 register REGEXCHAR* ep; /* ending of subpat.. */
608 REGEXCHAR* are; /* to save the line ptr. */
609
610 while ((op = *ap++) != END) switch (op) {
611 case CHR:
612 if (*lp++ != *ap++) return 0;
613 break;
614 case ANY:
615 if (!*lp++) return 0;
616 break;
617 case CCL:
618 c = *lp++;
619 if (!isinset(ap, c)) return 0;
620 ap += BITBLK;
621 break;
622 case BOL:
623 if (lp != bol) return 0;
624 break;
625 case EOL:
626 if (*lp) return 0;
627 break;
628 case BOT:
629 bopat[*ap++] = lp;
630 break;
631 case EOT:
632 eopat[*ap++] = lp;
633 break;
634 case BOW:
635 if ((lp != bol && iswordc(lp[-1])) || !iswordc(*lp)) return 0;
636 break;
637 case EOW:
638 if (lp == bol || !iswordc(lp[-1]) || iswordc(*lp)) return 0;
639 break;
640 case REF:
641 n = *ap++;
642 bp = bopat[n];
643 ep = eopat[n];
644 while (bp < ep)
645 if (*bp++ != *lp++) return 0;
646 break;
647 case CLO:
648 are = lp;
649 switch (*ap) {
650 case ANY:
651 while (*lp) lp++;
652 n = ANYSKIP;
653 break;
654 case CHR:
655 c = *(ap + 1);
656 while (*lp && c == *lp) lp++;
657 n = CHRSKIP;
658 break;
659 case CCL:
660 while ((c = *lp) && isinset(ap + 1, c)) lp++;
661 n = CCLSKIP;
662 break;
663 default:
664 re_fail("closure: bad nfa.", *ap);
665 return 0;
666 }
667
668 ap += n;
669
670 while (lp >= are) {
671 if ((e = pmatch(lp, ap))) return e;
672 --lp;
673 }
674 return 0;
675 default:
676 re_fail("re_exec: bad nfa.", op);
677 return 0;
678 }
679 return lp;
680 }
681
682 /*
683 * re_modw:
684 * add new characters into the word table to change re_exec's
685 * understanding of what a word should look like. Note that we
686 * only accept additions into the word definition.
687 *
688 * If the string parameter is 0 or null string, the table is
689 * reset back to the default containing A-Z a-z 0-9 _. [We use
690 * the compact bitset representation for the default table]
691 */
692
693 static REGEXCHAR deftab[16] = {0, 0, 0, 0, 0, 0, 0377, 003,
694 0376, 0377, 0377, 0207, 0376, 0377, 0377, 007};
695
re_modw(char * s)696 void LDAP_CALL re_modw(char* s) {
697 register int i;
698
699 if (!s || !*s) {
700 for (i = 0; i < MAXCHR; i++)
701 if (!isinset(deftab, i)) iswordc(i) = 0;
702 } else
703 while (*s) iswordc(*s++) = 1;
704 }
705
706 /*
707 * re_subs:
708 * substitute the matched portions of the src in dst.
709 *
710 * & substitute the entire matched pattern.
711 *
712 * \digit substitute a subpattern, with the given tag number.
713 * Tags are numbered from 1 to 9. If the particular
714 * tagged subpattern does not exist, null is substituted.
715 */
re_subs(char * src,char * dst)716 int LDAP_CALL re_subs(char* src, char* dst) {
717 register char c;
718 register int pin;
719 register REGEXCHAR* bp;
720 register REGEXCHAR* ep;
721
722 if (!*src || !bopat[0]) return 0;
723
724 while ((c = *src++)) {
725 switch (c) {
726 case '&':
727 pin = 0;
728 break;
729
730 case '\\':
731 c = *src++;
732 if (c >= '0' && c <= '9') {
733 pin = c - '0';
734 break;
735 }
736
737 default:
738 *dst++ = c;
739 continue;
740 }
741
742 if ((bp = bopat[pin]) && (ep = eopat[pin])) {
743 while (*bp && bp < ep) *dst++ = *(char*)bp++;
744 if (bp < ep) return 0;
745 }
746 }
747 *dst = (char)0;
748 return 1;
749 }
750
751 # ifdef DEBUG
752
753 /* No printf or exit in 16-bit Windows */
754 # if defined(_WINDOWS) && !defined(_WIN32)
printf(const char * pszFormat,...)755 static int LDAP_C printf(const char* pszFormat, ...) {
756 char buf[1024];
757 va_list arglist;
758 va_start(arglist, pszFormat);
759 vsprintf(buf, pszFormat, arglist);
760 va_end(arglist);
761 OutputDebugString(buf);
762 return 0;
763 }
764 # define exit(v) return
765 # endif /* 16-bit Windows */
766
767 # ifdef REGEX_DEBUG
768
769 static void nfadump(REGEXCHAR* ap);
770
771 /*
772 * symbolic - produce a symbolic dump of the nfa
773 */
symbolic(char * s)774 void symbolic(char* s) {
775 printf("pattern: %s\n", s);
776 printf("nfacode:\n");
777 nfadump(nfa);
778 }
779
nfadump(REGEXCHAR * ap)780 static void nfadump(REGEXCHAR* ap) {
781 register int n;
782
783 while (*ap != END) switch (*ap++) {
784 case CLO:
785 printf("CLOSURE");
786 nfadump(ap);
787 switch (*ap) {
788 case CHR:
789 n = CHRSKIP;
790 break;
791 case ANY:
792 n = ANYSKIP;
793 break;
794 case CCL:
795 n = CCLSKIP;
796 break;
797 }
798 ap += n;
799 break;
800 case CHR:
801 printf("\tCHR %c\n", *ap++);
802 break;
803 case ANY:
804 printf("\tANY .\n");
805 break;
806 case BOL:
807 printf("\tBOL -\n");
808 break;
809 case EOL:
810 printf("\tEOL -\n");
811 break;
812 case BOT:
813 printf("BOT: %d\n", *ap++);
814 break;
815 case EOT:
816 printf("EOT: %d\n", *ap++);
817 break;
818 case BOW:
819 printf("BOW\n");
820 break;
821 case EOW:
822 printf("EOW\n");
823 break;
824 case REF:
825 printf("REF: %d\n", *ap++);
826 break;
827 case CCL:
828 printf("\tCCL [");
829 for (n = 0; n < MAXCHR; n++)
830 if (isinset(ap, (REGEXCHAR)n)) {
831 if (n < ' ')
832 printf("^%c", n ^ 0x040);
833 else
834 printf("%c", n);
835 }
836 printf("]\n");
837 ap += BITBLK;
838 break;
839 default:
840 printf("bad nfa. opcode %o\n", ap[-1]);
841 exit(1);
842 break;
843 }
844 }
845 # endif /* REGEX_DEBUG */
846 # endif /* DEBUG */
847 #endif /* macintosh or DOS or _WINDOWS or NEED_BSDREGEX */
848