1 /*
2 regexpr.c
3
4 Author: Tatu Ylonen <ylo@ngs.fi>
5
6 Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
7
8 Permission to use, copy, modify, distribute, and sell this software
9 and its documentation is hereby granted without fee, provided that the
10 above copyright notice appears in all source code copies, the name of
11 Tatu Ylonen is not used to advertise products containing this software
12 or a derivation thereof, and all modified versions are clearly marked
13 as such.
14
15 This software is provided "as is" without express or implied warranty.
16
17 Created: Thu Sep 26 17:14:05 1991 ylo
18 Last modified: Sun Mar 29 16:47:31 1992 ylo
19
20 This code draws many ideas from the regular expression packages by
21 Henry Spencer of the University of Toronto and Richard Stallman of the
22 Free Software Foundation.
23
24 Emacs-specific code and syntax table code is almost directly borrowed
25 from GNU regexp.
26
27 $Header: /u/src/lib/tools/RCS/regexpr.c,v 1.4 92/03/29 16:52:04 ylo Exp $
28
29 */
30
31 #include <stdio.h>
32 #include <assert.h>
33 #include "regexpr.h"
34
35 char *malloc();
36 void free();
37 char *realloc();
38
39 #define MACRO_BEGIN do {
40 #define MACRO_END } while (0)
41
42 enum regexp_compiled_ops /* opcodes for compiled regexp */
43 {
44 Cend, /* end of pattern reached */
45 Cbol, /* beginning of line */
46 Ceol, /* end of line */
47 Cset, /* character set. Followed by 32 bytes of set. */
48 Cexact, /* followed by a byte to match */
49 Canychar, /* matches any character except newline */
50 Cstart_memory, /* set register start addr (followed by reg number) */
51 Cend_memory, /* set register end addr (followed by reg number) */
52 Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/
53 Cjump, /* followed by two bytes (lsb,msb) of displacement. */
54 Cstar_jump, /* will change to jump/update_failure_jump at runtime */
55 Cfailure_jump, /* jump to addr on failure */
56 Cupdate_failure_jump, /* update topmost failure point and jump */
57 Cdummy_failure_jump, /* push a dummy failure point and jump */
58 Cbegbuf, /* match at beginning of buffer */
59 Cendbuf, /* match at end of buffer */
60 Cwordbeg, /* match at beginning of word */
61 Cwordend, /* match at end of word */
62 Cwordbound, /* match if at word boundary */
63 Cnotwordbound, /* match if not at word boundary */
64 #ifdef emacs
65 Cemacs_at_dot, /* emacs only: matches at dot */
66 #endif /* emacs */
67 Csyntaxspec, /* matches syntax code (1 byte follows) */
68 Cnotsyntaxspec /* matches if syntax code does not match (1 byte foll)*/
69 };
70
71 enum regexp_syntax_op /* syntax codes for plain and quoted characters */
72 {
73 Rend, /* special code for end of regexp */
74 Rnormal, /* normal character */
75 Ranychar, /* any character except newline */
76 Rquote, /* the quote character */
77 Rbol, /* match beginning of line */
78 Reol, /* match end of line */
79 Roptional, /* match preceding expression optionally */
80 Rstar, /* match preceding expr zero or more times */
81 Rplus, /* match preceding expr one or more times */
82 Ror, /* match either of alternatives */
83 Ropenpar, /* opening parenthesis */
84 Rclosepar, /* closing parenthesis */
85 Rmemory, /* match memory register */
86 Rextended_memory, /* \vnn to match registers 10-99 */
87 Ropenset, /* open set. Internal syntax hard-coded below. */
88 /* the following are gnu extensions to "normal" regexp syntax */
89 Rbegbuf, /* beginning of buffer */
90 Rendbuf, /* end of buffer */
91 Rwordchar, /* word character */
92 Rnotwordchar, /* not word character */
93 Rwordbeg, /* beginning of word */
94 Rwordend, /* end of word */
95 Rwordbound, /* word bound */
96 Rnotwordbound, /* not word bound */
97 #ifdef emacs
98 Remacs_at_dot, /* emacs: at dot */
99 Remacs_syntaxspec, /* syntaxspec */
100 Remacs_notsyntaxspec, /* notsyntaxspec */
101 #endif /* emacs */
102 Rnum_ops
103 };
104
105 static int re_compile_initialized = 0;
106 static int regexp_syntax = 0;
107 static unsigned char regexp_plain_ops[256];
108 static unsigned char regexp_quoted_ops[256];
109 static unsigned char regexp_precedences[Rnum_ops];
110 static int regexp_context_indep_ops;
111 static int regexp_ansi_sequences;
112
113 #define NUM_LEVELS 5 /* number of precedence levels in use */
114 #define MAX_NESTING 100 /* max nesting level of operators */
115
116 #ifdef emacs
117
118 /* This code is for emacs compatibility only. */
119
120 #include "config.h"
121 #include "lisp.h"
122 #include "buffer.h"
123 #include "syntax.h"
124
125 /* emacs defines NULL in some strange way? */
126 #undef NULL
127 #define NULL 0
128
129 #else /* emacs */
130
131 #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
132 #define Sword 1
133
134 /* 94/01/19 (cjk) made re_syntax_table unsigned */
135 #ifdef SYNTAX_TABLE
136 unsigned char *re_syntax_table;
137 #else
138 static unsigned char re_syntax_table[256];
139 #endif /* SYNTAX_TABLE */
140
141 #endif /* emacs */
142
143 /* 94/01/19 (cjk) forward declaration for memset added */
144 /* CFS - MacOS X Server continues to not declare memset */
145 //#if defined(NeXT) || defined(__APPLE__)
146 // void *memset(void *, unsigned char, unsigned int);
147 //#endif /* defined(NeXT) */
148
149 //MacOS X 1.0.0 finally defines memset - all these years of waiting are over!
150
re_compile_initialize()151 static void re_compile_initialize()
152 {
153 int a;
154
155 #if !defined(emacs) && !defined(SYNTAX_TABLE)
156 static int syntax_table_inited = 0;
157
158 if (!syntax_table_inited)
159 {
160 syntax_table_inited = 1;
161 memset(re_syntax_table, 0, 256);
162 for (a = 'a'; a <= 'z'; a++)
163 re_syntax_table[a] = Sword;
164 for (a = 'A'; a <= 'Z'; a++)
165 re_syntax_table[a] = Sword;
166 for (a = '0'; a <= '9'; a++)
167 re_syntax_table[a] = Sword;
168 }
169 #endif /* !emacs && !SYNTAX_TABLE */
170 re_compile_initialized = 1;
171 for (a = 0; a < 256; a++)
172 {
173 regexp_plain_ops[a] = Rnormal;
174 regexp_quoted_ops[a] = Rnormal;
175 }
176 for (a = '0'; a <= '9'; a++)
177 regexp_quoted_ops[a] = Rmemory;
178 regexp_plain_ops['\134'] = Rquote;
179 if (regexp_syntax & RE_NO_BK_PARENS)
180 {
181 regexp_plain_ops['('] = Ropenpar;
182 regexp_plain_ops[')'] = Rclosepar;
183 }
184 else
185 {
186 regexp_quoted_ops['('] = Ropenpar;
187 regexp_quoted_ops[')'] = Rclosepar;
188 }
189 if (regexp_syntax & RE_NO_BK_VBAR)
190 regexp_plain_ops['\174'] = Ror;
191 else
192 regexp_quoted_ops['\174'] = Ror;
193 regexp_plain_ops['*'] = Rstar;
194 if (regexp_syntax & RE_BK_PLUS_QM)
195 {
196 regexp_quoted_ops['+'] = Rplus;
197 regexp_quoted_ops['?'] = Roptional;
198 }
199 else
200 {
201 regexp_plain_ops['+'] = Rplus;
202 regexp_plain_ops['?'] = Roptional;
203 }
204 if (regexp_syntax & RE_NEWLINE_OR)
205 regexp_plain_ops['\n'] = Ror;
206 regexp_plain_ops['\133'] = Ropenset;
207 regexp_plain_ops['\136'] = Rbol;
208 regexp_plain_ops['$'] = Reol;
209 regexp_plain_ops['.'] = Ranychar;
210 if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
211 {
212 #ifdef emacs
213 regexp_quoted_ops['='] = Remacs_at_dot;
214 regexp_quoted_ops['s'] = Remacs_syntaxspec;
215 regexp_quoted_ops['S'] = Remacs_notsyntaxspec;
216 #endif /* emacs */
217 regexp_quoted_ops['w'] = Rwordchar;
218 regexp_quoted_ops['W'] = Rnotwordchar;
219 regexp_quoted_ops['<'] = Rwordbeg;
220 regexp_quoted_ops['>'] = Rwordend;
221 regexp_quoted_ops['b'] = Rwordbound;
222 regexp_quoted_ops['B'] = Rnotwordbound;
223 regexp_quoted_ops['`'] = Rbegbuf;
224 regexp_quoted_ops['\''] = Rendbuf;
225 }
226 if (regexp_syntax & RE_ANSI_HEX)
227 regexp_quoted_ops['v'] = Rextended_memory;
228 for (a = 0; a < Rnum_ops; a++)
229 regexp_precedences[a] = 4;
230 if (regexp_syntax & RE_TIGHT_VBAR)
231 {
232 regexp_precedences[Ror] = 3;
233 regexp_precedences[Rbol] = 2;
234 regexp_precedences[Reol] = 2;
235 }
236 else
237 {
238 regexp_precedences[Ror] = 2;
239 regexp_precedences[Rbol] = 3;
240 regexp_precedences[Reol] = 3;
241 }
242 regexp_precedences[Rclosepar] = 1;
243 regexp_precedences[Rend] = 0;
244 regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
245 regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
246 }
247
re_set_syntax(syntax)248 int re_set_syntax(syntax)
249 int syntax;
250 {
251 int ret;
252
253 ret = regexp_syntax;
254 regexp_syntax = syntax;
255 re_compile_initialize();
256 return ret;
257 }
258
hex_char_to_decimal(ch)259 static int hex_char_to_decimal(ch)
260 int ch;
261 {
262 if (ch >= '0' && ch <= '9')
263 return ch - '0';
264 if (ch >= 'a' && ch <= 'f')
265 return ch - 'a' + 10;
266 if (ch >= 'A' && ch <= 'F')
267 return ch - 'A' + 10;
268 return 16;
269 }
270
re_compile_pattern(regex,size,bufp)271 char *re_compile_pattern(regex, size, bufp)
272 char *regex;
273 int size;
274 regexp_t bufp;
275 {
276 int a, pos, op, current_level, level, opcode;
277 int pattern_offset=0, alloc; /* 94/01/19 (cjk) pattern_offset initialized */
278 int starts[NUM_LEVELS * MAX_NESTING], starts_base;
279 int future_jumps[MAX_NESTING], num_jumps;
280 unsigned char ch=0; /* 94/01/19 (cjk) ch initialized */
281 char *pattern, *translate;
282 int next_register, paren_depth, num_open_registers, open_registers[RE_NREGS];
283 int beginning_context;
284
285 #define NEXTCHAR(var) \
286 MACRO_BEGIN \
287 if (pos >= size) \
288 goto ends_prematurely; \
289 (var) = regex[pos]; \
290 pos++; \
291 MACRO_END
292
293 #define ALLOC(amount) \
294 MACRO_BEGIN \
295 if (pattern_offset+(amount) > alloc) \
296 { \
297 alloc += 256 + (amount); \
298 pattern = realloc(pattern, alloc); \
299 if (!pattern) \
300 goto out_of_memory; \
301 } \
302 MACRO_END
303
304 #define STORE(ch) pattern[pattern_offset++] = (ch)
305
306 #define CURRENT_LEVEL_START (starts[starts_base + current_level])
307
308 #define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
309
310 #define PUSH_LEVEL_STARTS if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
311 starts_base += NUM_LEVELS; \
312 else \
313 goto too_complex
314
315 #define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
316
317 #define PUT_ADDR(offset,addr) \
318 MACRO_BEGIN \
319 int disp = (addr) - (offset) - 2; \
320 pattern[(offset)] = disp & 0xff; \
321 pattern[(offset)+1] = (disp>>8) & 0xff; \
322 MACRO_END
323
324 #define INSERT_JUMP(pos,type,addr) \
325 MACRO_BEGIN \
326 int a, p = (pos), t = (type), ad = (addr); \
327 for (a = pattern_offset - 1; a >= p; a--) \
328 pattern[a + 3] = pattern[a]; \
329 pattern[p] = t; \
330 PUT_ADDR(p+1,ad); \
331 pattern_offset += 3; \
332 MACRO_END
333
334 #define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
335
336 #define SET_FIELDS \
337 MACRO_BEGIN \
338 bufp->allocated = alloc; \
339 bufp->buffer = pattern; \
340 bufp->used = pattern_offset; \
341 MACRO_END
342
343 #define GETHEX(var) \
344 MACRO_BEGIN \
345 char gethex_ch, gethex_value; \
346 NEXTCHAR(gethex_ch); \
347 gethex_value = hex_char_to_decimal(gethex_ch); \
348 if (gethex_value == 16) \
349 goto hex_error; \
350 NEXTCHAR(gethex_ch); \
351 gethex_ch = hex_char_to_decimal(gethex_ch); \
352 if (gethex_ch == 16) \
353 goto hex_error; \
354 (var) = gethex_value * 16 + gethex_ch; \
355 MACRO_END
356
357 #define ANSI_TRANSLATE(ch) \
358 MACRO_BEGIN \
359 switch (ch) \
360 { \
361 case 'a': \
362 case 'A': \
363 ch = 7; /* audible bell */ \
364 break; \
365 case 'b': \
366 case 'B': \
367 ch = 8; /* backspace */ \
368 break; \
369 case 'f': \
370 case 'F': \
371 ch = 12; /* form feed */ \
372 break; \
373 case 'n': \
374 case 'N': \
375 ch = 10; /* line feed */ \
376 break; \
377 case 'r': \
378 case 'R': \
379 ch = 13; /* carriage return */ \
380 break; \
381 case 't': \
382 case 'T': \
383 ch = 9; /* tab */ \
384 break; \
385 case 'v': \
386 case 'V': \
387 ch = 11; /* vertical tab */ \
388 break; \
389 case 'x': /* hex code */ \
390 case 'X': \
391 GETHEX(ch); \
392 break; \
393 default: \
394 /* other characters passed through */ \
395 if (translate) \
396 ch = translate[(unsigned char)ch]; \
397 break; \
398 } \
399 MACRO_END
400
401 if (!re_compile_initialized)
402 re_compile_initialize();
403 bufp->used = 0;
404 bufp->fastmap_accurate = 0;
405 bufp->uses_registers = 0;
406 translate = bufp->translate;
407 pattern = bufp->buffer;
408 alloc = bufp->allocated;
409 if (alloc == 0 || pattern == NULL)
410 {
411 alloc = 256;
412 pattern = malloc(alloc);
413 if (!pattern)
414 goto out_of_memory;
415 }
416 pattern_offset = 0;
417 starts_base = 0;
418 num_jumps = 0;
419 current_level = 0;
420 SET_LEVEL_START;
421 num_open_registers = 0;
422 next_register = 1;
423 paren_depth = 0;
424 beginning_context = 1;
425 op = -1;
426 /* we use Rend dummy to ensure that pending jumps are updated (due to
427 low priority of Rend) before exiting the loop. */
428 pos = 0;
429 while (op != Rend)
430 {
431 if (pos >= size)
432 op = Rend;
433 else
434 {
435 NEXTCHAR(ch);
436 if (translate)
437 ch = translate[(unsigned char)ch];
438 op = regexp_plain_ops[(unsigned char)ch];
439 if (op == Rquote)
440 {
441 NEXTCHAR(ch);
442 op = regexp_quoted_ops[(unsigned char)ch];
443 if (op == Rnormal && regexp_ansi_sequences)
444 ANSI_TRANSLATE(ch);
445 }
446 }
447 level = regexp_precedences[op];
448 /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n",
449 ch, op, level, current_level, CURRENT_LEVEL_START); */
450 if (level > current_level)
451 {
452 for (current_level++; current_level < level; current_level++)
453 SET_LEVEL_START;
454 SET_LEVEL_START;
455 }
456 else
457 if (level < current_level)
458 {
459 current_level = level;
460 for (;num_jumps > 0 &&
461 future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
462 num_jumps--)
463 PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
464 }
465 switch (op)
466 {
467 case Rend:
468 break;
469 case Rnormal:
470 normal_char:
471 opcode = Cexact;
472 store_opcode_and_arg: /* opcode & ch must be set */
473 SET_LEVEL_START;
474 ALLOC(2);
475 STORE(opcode);
476 STORE(ch);
477 break;
478 case Ranychar:
479 opcode = Canychar;
480 store_opcode:
481 SET_LEVEL_START;
482 ALLOC(1);
483 STORE(opcode);
484 break;
485 case Rquote:
486 abort();
487 /*NOTREACHED*/
488 case Rbol:
489 if (!beginning_context)
490 if (regexp_context_indep_ops)
491 goto op_error;
492 else
493 goto normal_char;
494 opcode = Cbol;
495 goto store_opcode;
496 case Reol:
497 if (!((pos >= size) ||
498 ((regexp_syntax & RE_NO_BK_VBAR) ?
499 (regex[pos] == '\174') :
500 (pos+1 < size && regex[pos] == '\134' &&
501 regex[pos+1] == '\174')) ||
502 ((regexp_syntax & RE_NO_BK_PARENS)?
503 (regex[pos] == ')'):
504 (pos+1 < size && regex[pos] == '\134' &&
505 regex[pos+1] == ')'))))
506 if (regexp_context_indep_ops)
507 goto op_error;
508 else
509 goto normal_char;
510 opcode = Ceol;
511 goto store_opcode;
512 break;
513 case Roptional:
514 if (beginning_context)
515 if (regexp_context_indep_ops)
516 goto op_error;
517 else
518 goto normal_char;
519 if (CURRENT_LEVEL_START == pattern_offset)
520 break; /* ignore empty patterns for ? */
521 ALLOC(3);
522 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
523 pattern_offset + 3);
524 break;
525 case Rstar:
526 case Rplus:
527 if (beginning_context)
528 if (regexp_context_indep_ops)
529 goto op_error;
530 else
531 goto normal_char;
532 if (CURRENT_LEVEL_START == pattern_offset)
533 break; /* ignore empty patterns for + and * */
534 ALLOC(9);
535 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
536 pattern_offset + 6);
537 INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
538 if (op == Rplus) /* jump over initial failure_jump */
539 INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
540 CURRENT_LEVEL_START + 6);
541 break;
542 case Ror:
543 ALLOC(6);
544 INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
545 pattern_offset + 6);
546 if (num_jumps >= MAX_NESTING)
547 goto too_complex;
548 STORE(Cjump);
549 future_jumps[num_jumps++] = pattern_offset;
550 STORE(0);
551 STORE(0);
552 SET_LEVEL_START;
553 break;
554 case Ropenpar:
555 SET_LEVEL_START;
556 if (next_register < RE_NREGS)
557 {
558 bufp->uses_registers = 1;
559 ALLOC(2);
560 STORE(Cstart_memory);
561 STORE(next_register);
562 open_registers[num_open_registers++] = next_register;
563 next_register++;
564 }
565 paren_depth++;
566 PUSH_LEVEL_STARTS;
567 current_level = 0;
568 SET_LEVEL_START;
569 break;
570 case Rclosepar:
571 if (paren_depth <= 0)
572 goto parenthesis_error;
573 POP_LEVEL_STARTS;
574 current_level = regexp_precedences[Ropenpar];
575 paren_depth--;
576 if (paren_depth < num_open_registers)
577 {
578 bufp->uses_registers = 1;
579 ALLOC(2);
580 STORE(Cend_memory);
581 num_open_registers--;
582 STORE(open_registers[num_open_registers]);
583 }
584 break;
585 case Rmemory:
586 if (ch == '0')
587 goto bad_match_register;
588 assert(ch >= '0' && ch <= '9');
589 bufp->uses_registers = 1;
590 opcode = Cmatch_memory;
591 ch -= '0';
592 goto store_opcode_and_arg;
593 case Rextended_memory:
594 NEXTCHAR(ch);
595 if (ch < '0' || ch > '9')
596 goto bad_match_register;
597 NEXTCHAR(a);
598 if (a < '0' || a > '9')
599 goto bad_match_register;
600 ch = 10 * (a - '0') + ch - '0';
601 if (ch <= 0 || ch >= RE_NREGS)
602 goto bad_match_register;
603 bufp->uses_registers = 1;
604 opcode = Cmatch_memory;
605 goto store_opcode_and_arg;
606 case Ropenset:
607 {
608 int complement,prev,offset,range,firstchar;
609
610 SET_LEVEL_START;
611 ALLOC(1+256/8);
612 STORE(Cset);
613 offset = pattern_offset;
614 for (a = 0; a < 256/8; a++)
615 STORE(0);
616 NEXTCHAR(ch);
617 if (translate)
618 ch = translate[(unsigned char)ch];
619 if (ch == '\136')
620 {
621 complement = 1;
622 NEXTCHAR(ch);
623 if (translate)
624 ch = translate[(unsigned char)ch];
625 }
626 else
627 complement = 0;
628 prev = -1;
629 range = 0;
630 firstchar = 1;
631 while (ch != '\135' || firstchar)
632 {
633 firstchar = 0;
634 if (regexp_ansi_sequences && ch == '\134')
635 {
636 NEXTCHAR(ch);
637 ANSI_TRANSLATE(ch);
638 }
639 if (range)
640 {
641 for (a = prev; a <= ch; a++)
642 SETBIT(pattern, offset, a);
643 prev = -1;
644 range = 0;
645 }
646 else
647 if (prev != -1 && ch == '-')
648 range = 1;
649 else
650 {
651 SETBIT(pattern, offset, ch);
652 prev = ch;
653 }
654 NEXTCHAR(ch);
655 if (translate)
656 ch = translate[(unsigned char)ch];
657 }
658 if (range)
659 SETBIT(pattern, offset, '-');
660 if (complement)
661 {
662 for (a = 0; a < 256/8; a++)
663 pattern[offset+a] ^= 0xff;
664 }
665 break;
666 }
667 case Rbegbuf:
668 opcode = Cbegbuf;
669 goto store_opcode;
670 case Rendbuf:
671 opcode = Cendbuf;
672 goto store_opcode;
673 case Rwordchar:
674 opcode = Csyntaxspec;
675 ch = Sword;
676 goto store_opcode_and_arg;
677 case Rnotwordchar:
678 opcode = Cnotsyntaxspec;
679 ch = Sword;
680 goto store_opcode_and_arg;
681 case Rwordbeg:
682 opcode = Cwordbeg;
683 goto store_opcode;
684 case Rwordend:
685 opcode = Cwordend;
686 goto store_opcode;
687 case Rwordbound:
688 opcode = Cwordbound;
689 goto store_opcode;
690 case Rnotwordbound:
691 opcode = Cnotwordbound;
692 goto store_opcode;
693 #ifdef emacs
694 case Remacs_at_dot:
695 opcode = Cemacs_at_dot;
696 goto store_opcode;
697 case Remacs_syntaxspec:
698 NEXTCHAR(ch);
699 if (translate)
700 ch = translate[(unsigned char)ch];
701 opcode = Csyntaxspec;
702 ch = syntax_spec_code[(unsigned char)ch];
703 goto store_opcode_and_arg;
704 case Remacs_notsyntaxspec:
705 NEXTCHAR(ch);
706 if (translate)
707 ch = translate[(unsigned char)ch];
708 opcode = Cnotsyntaxspec;
709 ch = syntax_spec_code[(unsigned char)ch];
710 goto store_opcode_and_arg;
711 #endif /* emacs */
712 default:
713 abort();
714 }
715 beginning_context = (op == Ropenpar || op == Ror);
716 }
717 if (starts_base != 0)
718 goto parenthesis_error;
719 assert(num_jumps == 0);
720 ALLOC(1);
721 STORE(Cend);
722 SET_FIELDS;
723 return NULL;
724
725 op_error:
726 SET_FIELDS;
727 return "Badly placed special character";
728
729 bad_match_register:
730 SET_FIELDS;
731 return "Bad match register number";
732
733 hex_error:
734 SET_FIELDS;
735 return "Bad hexadecimal number";
736
737 parenthesis_error:
738 SET_FIELDS;
739 return "Badly placed parenthesis";
740
741 out_of_memory:
742 SET_FIELDS;
743 return "Out of memory";
744
745 ends_prematurely:
746 SET_FIELDS;
747 return "Regular expression ends prematurely";
748
749 too_complex:
750 SET_FIELDS;
751 return "Regular expression too complex";
752 }
753 #undef CHARAT
754 #undef NEXTCHAR
755 #undef GETHEX
756 #undef ALLOC
757 #undef STORE
758 #undef CURRENT_LEVEL_START
759 #undef SET_LEVEL_START
760 #undef PUSH_LEVEL_STARTS
761 #undef POP_LEVEL_STARTS
762 #undef PUT_ADDR
763 #undef INSERT_JUMP
764 #undef SETBIT
765 #undef SET_FIELDS
766
re_compile_fastmap_aux(code,pos,visited,can_be_null,fastmap)767 static void re_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)
768 char *code, *visited, *can_be_null, *fastmap;
769 int pos;
770 {
771 int a, b, syntaxcode;
772
773 if (visited[pos])
774 return; /* we have already been here */
775 visited[pos] = 1;
776 for (;;)
777 switch (code[pos++])
778 {
779 case Cend:
780 *can_be_null = 1;
781 return;
782 case Cbol:
783 case Cbegbuf:
784 case Cendbuf:
785 case Cwordbeg:
786 case Cwordend:
787 case Cwordbound:
788 case Cnotwordbound:
789 #ifdef emacs
790 case Cemacs_at_dot:
791 #endif /* emacs */
792 break;
793 case Csyntaxspec:
794 syntaxcode = code[pos++];
795 for (a = 0; a < 256; a++)
796 if (SYNTAX(a) == syntaxcode)
797 fastmap[a] = 1;
798 return;
799 case Cnotsyntaxspec:
800 syntaxcode = code[pos++];
801 for (a = 0; a < 256; a++)
802 if (SYNTAX(a) != syntaxcode)
803 fastmap[a] = 1;
804 return;
805 case Ceol:
806 fastmap['\n'] = 1;
807 if (*can_be_null == 0)
808 *can_be_null = 2; /* can match null, but only at end of buffer*/
809 return;
810 case Cset:
811 for (a = 0; a < 256/8; a++)
812 if (code[pos + a] != 0)
813 for (b = 0; b < 8; b++)
814 if (code[pos + a] & (1 << b))
815 fastmap[(a << 3) + b] = 1;
816 pos += 256/8;
817 return;
818 case Cexact:
819 fastmap[(unsigned char)code[pos]] = 1;
820 return;
821 case Canychar:
822 for (a = 0; a < 256; a++)
823 if (a != '\n')
824 fastmap[a] = 1;
825 return;
826 case Cstart_memory:
827 case Cend_memory:
828 pos++;
829 break;
830 case Cmatch_memory:
831 for (a = 0; a < 256; a++)
832 fastmap[a] = 1;
833 *can_be_null = 1;
834 return;
835 case Cjump:
836 case Cdummy_failure_jump:
837 case Cupdate_failure_jump:
838 case Cstar_jump:
839 a = (unsigned char)code[pos++];
840 a |= (unsigned char)code[pos++] << 8;
841 pos += (int)(short)a;
842 if (visited[pos])
843 {
844 /* argh... the regexp contains empty loops. This is not
845 good, as this may cause a failure stack overflow when
846 matching. Oh well. */
847 /* this path leads nowhere; pursue other paths. */
848 return;
849 }
850 visited[pos] = 1;
851 break;
852 case Cfailure_jump:
853 a = (unsigned char)code[pos++];
854 a |= (unsigned char)code[pos++] << 8;
855 a = pos + (int)(short)a;
856 re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
857 break;
858 default:
859 abort(); /* probably some opcode is missing from this switch */
860 /*NOTREACHED*/
861 }
862 }
863
re_do_compile_fastmap(buffer,used,pos,can_be_null,fastmap)864 static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap)
865 char *buffer, *fastmap, *can_be_null;
866 int used, pos;
867 {
868 char small_visited[512], *visited;
869
870 if (used <= sizeof(small_visited))
871 visited = small_visited;
872 else
873 {
874 visited = malloc(used);
875 if (!visited)
876 return 0;
877 }
878 *can_be_null = 0;
879 memset(fastmap, 0, 256);
880 memset(visited, 0, used);
881 re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
882 if (visited != small_visited)
883 free(visited);
884 return 1;
885 }
886
re_compile_fastmap(bufp)887 void re_compile_fastmap(bufp)
888 regexp_t bufp;
889 {
890 if (!bufp->fastmap || bufp->fastmap_accurate)
891 return;
892 assert(bufp->used > 0);
893 if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null,
894 bufp->fastmap))
895 return;
896 if (bufp->buffer[0] == Cbol)
897 bufp->anchor = 1; /* begline */
898 else
899 if (bufp->buffer[0] == Cbegbuf)
900 bufp->anchor = 2; /* begbuf */
901 else
902 bufp->anchor = 0; /* none */
903 bufp->fastmap_accurate = 1;
904 }
905
906 #define INITIAL_FAILURES 128 /* initial # failure points to allocate */
907 #define MAX_FAILURES 4100 /* max # of failure points before failing */
908
re_match_pattern_2(bufp,string1,size1,string2,size2,pos,regs,mstop)909 int re_match_pattern_2(bufp, string1, size1, string2, size2, pos, regs, mstop)
910 regexp_t bufp;
911 char *string1, *string2;
912 int size1, size2, pos, mstop;
913 regexp_registers_t regs;
914 {
915 struct failure_point { char *text, *partend, *code; }
916 *failure_stack_start, *failure_sp, *failure_stack_end,
917 initial_failure_stack[INITIAL_FAILURES];
918 char *code, *translate, *text, *textend, *partend, *part_2_end;
919 char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS];
920 char *regend_text[RE_NREGS], *regend_partend[RE_NREGS];
921 int a, b, ch, reg, regch, match_end;
922 char *regtext, *regpartend, *regtextend;
923
924 #define PREFETCH \
925 MACRO_BEGIN \
926 if (text == partend) \
927 { \
928 if (text == textend) \
929 goto fail; \
930 text = string2; \
931 partend = part_2_end; \
932 } \
933 MACRO_END
934
935 #define NEXTCHAR(var) \
936 MACRO_BEGIN \
937 PREFETCH; \
938 (var) = (unsigned char)*text++; \
939 if (translate) \
940 (var) = (unsigned char)translate[(var)]; \
941 MACRO_END
942
943 assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0);
944 assert(mstop <= size1 + size2);
945 assert(pos <= mstop);
946
947 if (pos <= size1)
948 {
949 text = string1 + pos;
950 if (mstop <= size1)
951 {
952 partend = string1 + mstop;
953 textend = partend;
954 }
955 else
956 {
957 partend = string1 + size1;
958 textend = string2 + mstop - size1;
959 }
960 part_2_end = string2 + mstop - size1;
961 }
962 else
963 {
964 text = string2 + pos - size1;
965 partend = string2 + mstop - size1;
966 textend = partend;
967 part_2_end = partend;
968 }
969
970 if (bufp->uses_registers && regs != NULL)
971 for (a = 0; a < RE_NREGS; a++)
972 regend_text[a] = NULL;
973
974 code = bufp->buffer;
975 translate = bufp->translate;
976 failure_stack_start = failure_sp = initial_failure_stack;
977 failure_stack_end = initial_failure_stack + INITIAL_FAILURES;
978
979 #if 0
980 /* re_search_pattern_2 has already done this, and otherwise we get little benefit
981 from this. So I'll leave this out. */
982 if (bufp->fastmap_accurate && !bufp->can_be_null &&
983 text != textend &&
984 !bufp->fastmap[translate ?
985 (unsigned char)translate[(unsigned char)*text] :
986 (unsigned char)*text])
987 return -1; /* it can't possibly match */
988 #endif
989
990 continue_matching:
991 for (;;)
992 {
993 switch (*code++)
994 {
995 case Cend:
996 if (partend != part_2_end)
997 match_end = text - string1;
998 else
999 match_end = text - string2 + size1;
1000 if (regs)
1001 {
1002 regs->start[0] = pos;
1003 regs->end[0] = match_end;
1004 if (!bufp->uses_registers)
1005 {
1006 for (a = 1; a < RE_NREGS; a++)
1007 {
1008 regs->start[a] = -1;
1009 regs->end[a] = -1;
1010 }
1011 }
1012 else
1013 {
1014 for (a = 1; a < RE_NREGS; a++)
1015 {
1016 if (regend_text[a] == NULL)
1017 {
1018 regs->start[a] = -1;
1019 regs->end[a] = -1;
1020 continue;
1021 }
1022 if (regstart_partend[a] != part_2_end)
1023 regs->start[a] = regstart_text[a] - string1;
1024 else
1025 regs->start[a] = regstart_text[a] - string2 + size1;
1026 if (regend_partend[a] != part_2_end)
1027 regs->end[a] = regend_text[a] - string1;
1028 else
1029 regs->end[a] = regend_text[a] - string2 + size1;
1030 }
1031 }
1032 }
1033 if (failure_stack_start != initial_failure_stack)
1034 free((char *)failure_stack_start);
1035 return match_end - pos;
1036 case Cbol:
1037 if (text == string1 || text[-1] == '\n') /* text[-1] always valid */
1038 break;
1039 goto fail;
1040 case Ceol:
1041 if (text == string2 + size2 ||
1042 (text == string1 + size1 ?
1043 (size2 == 0 || *string2 == '\n') :
1044 *text == '\n'))
1045 break;
1046 goto fail;
1047 case Cset:
1048 NEXTCHAR(ch);
1049 if (code[ch/8] & (1<<(ch & 7)))
1050 {
1051 code += 256/8;
1052 break;
1053 }
1054 goto fail;
1055 case Cexact:
1056 NEXTCHAR(ch);
1057 if (ch != (unsigned char)*code++)
1058 goto fail;
1059 break;
1060 case Canychar:
1061 NEXTCHAR(ch);
1062 if (ch == '\n')
1063 goto fail;
1064 break;
1065 case Cstart_memory:
1066 reg = *code++;
1067 regstart_text[reg] = text;
1068 regstart_partend[reg] = partend;
1069 break;
1070 case Cend_memory:
1071 reg = *code++;
1072 regend_text[reg] = text;
1073 regend_partend[reg] = partend;
1074 break;
1075 case Cmatch_memory:
1076 reg = *code++;
1077 if (regend_text[reg] == NULL)
1078 goto fail; /* or should we just match nothing? */
1079 regtext = regstart_text[reg];
1080 regtextend = regend_text[reg];
1081 if (regstart_partend[reg] == regend_partend[reg])
1082 regpartend = regtextend;
1083 else
1084 regpartend = string1 + size1;
1085
1086 for (;regtext != regtextend;)
1087 {
1088 NEXTCHAR(ch);
1089 if (regtext == regpartend)
1090 regtext = string2;
1091 regch = (unsigned char)*regtext++;
1092 if (translate)
1093 regch = (unsigned char)translate[regch];
1094 if (regch != ch)
1095 goto fail;
1096 }
1097 break;
1098 case Cstar_jump:
1099 /* star is coded as:
1100 1: failure_jump 2
1101 ... code for operand of star
1102 star_jump 1
1103 2: ... code after star
1104 We change the star_jump to update_failure_jump if we can determine
1105 that it is safe to do so; otherwise we change it to an ordinary
1106 jump.
1107 plus is coded as
1108 jump 2
1109 1: failure_jump 3
1110 2: ... code for operand of plus
1111 star_jump 1
1112 3: ... code after plus
1113 For star_jump considerations this is processed identically
1114 to star. */
1115 a = (unsigned char)*code++;
1116 a |= (unsigned char)*code++ << 8;
1117 a = (int)(short)a;
1118 {
1119 char map[256], can_be_null;
1120 char *p1, *p2;
1121
1122 p1 = code + a + 3; /* skip the failure_jump */
1123 assert(p1[-3] == Cfailure_jump);
1124 p2 = code;
1125 /* p1 points inside loop, p2 points to after loop */
1126 if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
1127 p2 - bufp->buffer, &can_be_null, map))
1128 goto make_normal_jump;
1129 /* If we might introduce a new update point inside the loop,
1130 we can't optimize because then update_jump would update a
1131 wrong failure point. Thus we have to be quite careful here. */
1132 loop_p1:
1133 /* loop until we find something that consumes a character */
1134 switch (*p1++)
1135 {
1136 case Cbol:
1137 case Ceol:
1138 case Cbegbuf:
1139 case Cendbuf:
1140 case Cwordbeg:
1141 case Cwordend:
1142 case Cwordbound:
1143 case Cnotwordbound:
1144 #ifdef emacs
1145 case Cemacs_at_dot:
1146 #endif /* emacs */
1147 goto loop_p1;
1148 case Cstart_memory:
1149 case Cend_memory:
1150 p1++;
1151 goto loop_p1;
1152 case Cexact:
1153 ch = (unsigned char)*p1++;
1154 if (map[ch])
1155 goto make_normal_jump;
1156 break;
1157 case Canychar:
1158 for (b = 0; b < 256; b++)
1159 if (b != '\n' && map[b])
1160 goto make_normal_jump;
1161 break;
1162 case Cset:
1163 for (b = 0; b < 256; b++)
1164 if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
1165 goto make_normal_jump;
1166 p1 += 256/8;
1167 break;
1168 default:
1169 goto make_normal_jump;
1170 }
1171 /* now we know that we can't backtrack. */
1172 while (p1 != p2 - 3)
1173 {
1174 switch (*p1++)
1175 {
1176 case Cend:
1177 abort(); /* we certainly shouldn't get this inside loop */
1178 /*NOTREACHED*/
1179 case Cbol:
1180 case Ceol:
1181 case Canychar:
1182 case Cbegbuf:
1183 case Cendbuf:
1184 case Cwordbeg:
1185 case Cwordend:
1186 case Cwordbound:
1187 case Cnotwordbound:
1188 #ifdef emacs
1189 case Cemacs_at_dot:
1190 #endif /* emacs */
1191 break;
1192 case Cset:
1193 p1 += 256/8;
1194 break;
1195 case Cexact:
1196 case Cstart_memory:
1197 case Cend_memory:
1198 case Cmatch_memory:
1199 case Csyntaxspec:
1200 case Cnotsyntaxspec:
1201 p1++;
1202 break;
1203 case Cjump:
1204 case Cstar_jump:
1205 case Cfailure_jump:
1206 case Cupdate_failure_jump:
1207 case Cdummy_failure_jump:
1208 goto make_normal_jump;
1209 default:
1210 printf("regexpr.c: processing star_jump: unknown op %d\n", p1[-1]);
1211 break;
1212 }
1213 }
1214 goto make_update_jump;
1215 }
1216 make_normal_jump:
1217 /* printf("changing to normal jump\n"); */
1218 code -= 3;
1219 *code = Cjump;
1220 break;
1221 make_update_jump:
1222 /* printf("changing to update jump\n"); */
1223 code -= 2;
1224 a += 3; /* jump to after the Cfailure_jump */
1225 code[-1] = Cupdate_failure_jump;
1226 code[0] = a & 0xff;
1227 code[1] = a >> 8;
1228 /* fall to next case */
1229 case Cupdate_failure_jump:
1230 failure_sp[-1].text = text;
1231 failure_sp[-1].partend = partend;
1232 /* fall to next case */
1233 case Cjump:
1234 a = (unsigned char)*code++;
1235 a |= (unsigned char)*code++ << 8;
1236 code += (int)(short)a;
1237 break;
1238 case Cdummy_failure_jump:
1239 case Cfailure_jump:
1240 if (failure_sp == failure_stack_end)
1241 {
1242 if (failure_stack_start != initial_failure_stack)
1243 goto error;
1244 failure_stack_start = (struct failure_point *)
1245 malloc(MAX_FAILURES * sizeof(*failure_stack_start));
1246 failure_stack_end = failure_stack_start + MAX_FAILURES;
1247 memcpy((char *)failure_stack_start, (char *)initial_failure_stack,
1248 INITIAL_FAILURES * sizeof(*failure_stack_start));
1249 failure_sp = failure_stack_start + INITIAL_FAILURES;
1250 }
1251 a = (unsigned char)*code++;
1252 a |= (unsigned char)*code++ << 8;
1253 a = (int)(short)a;
1254 if (code[-3] == Cdummy_failure_jump)
1255 { /* this is only used in plus */
1256 assert(*code == Cfailure_jump);
1257 b = (unsigned char)code[1];
1258 b |= (unsigned char)code[2] << 8;
1259 failure_sp->code = code + (int)(short)b + 3;
1260 failure_sp->text = NULL;
1261 code += a;
1262 }
1263 else
1264 {
1265 failure_sp->code = code + a;
1266 failure_sp->text = text;
1267 failure_sp->partend = partend;
1268 }
1269 failure_sp++;
1270 break;
1271 case Cbegbuf:
1272 if (text == string1)
1273 break;
1274 goto fail;
1275 case Cendbuf:
1276 if (size2 == 0 ? text == string1 + size1 : text == string2 + size2)
1277 break;
1278 goto fail;
1279 case Cwordbeg:
1280 if (text == string2 + size2)
1281 goto fail;
1282 if (size2 == 0 && text == string1 + size1)
1283 goto fail;
1284 if (SYNTAX(text == string1 + size1 ? *string1 : *text) != Sword)
1285 goto fail;
1286 if (text == string1)
1287 break;
1288 if (SYNTAX(text[-1]) != Sword)
1289 break;
1290 goto fail;
1291 case Cwordend:
1292 if (text == string1)
1293 goto fail;
1294 if (SYNTAX(text[-1]) != Sword)
1295 goto fail;
1296 if (text == string2 + size2)
1297 break;
1298 if (size2 == 0 && text == string1 + size1)
1299 break;
1300 if (SYNTAX(*text) == Sword)
1301 goto fail;
1302 break;
1303 case Cwordbound:
1304 /* Note: as in gnu regexp, this also matches at the beginning
1305 and end of buffer. */
1306 if (text == string1 || text == string2 + size2 ||
1307 (size2 == 0 && text == string1 + size1))
1308 break;
1309 if ((SYNTAX(text[-1]) == Sword) ^
1310 (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword))
1311 break;
1312 goto fail;
1313 case Cnotwordbound:
1314 /* Note: as in gnu regexp, this never matches at the beginning
1315 and end of buffer. */
1316 if (text == string1 || text == string2 + size2 ||
1317 (size2 == 0 && text == string1 + size1))
1318 goto fail;
1319 if (!((SYNTAX(text[-1]) == Sword) ^
1320 (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword)))
1321 goto fail;
1322 break;
1323 case Csyntaxspec:
1324 NEXTCHAR(ch);
1325 if (SYNTAX(ch) != (unsigned char)*code++)
1326 goto fail;
1327 break;
1328 case Cnotsyntaxspec:
1329 NEXTCHAR(ch);
1330 if (SYNTAX(ch) != (unsigned char)*code++)
1331 break;
1332 goto fail;
1333 #ifdef emacs
1334 case Cemacs_at_dot:
1335 if (PTR_CHAR_POS((unsigned char *)text) + 1 != point)
1336 goto fail;
1337 break;
1338 #endif /* emacs */
1339 default:
1340 abort();
1341 /*NOTREACHED*/
1342 }
1343 }
1344 abort();
1345 /*NOTREACHED*/
1346
1347 fail:
1348 if (failure_sp != failure_stack_start)
1349 {
1350 failure_sp--;
1351 text = failure_sp->text;
1352 if (text == NULL)
1353 goto fail;
1354 partend = failure_sp->partend;
1355 code = failure_sp->code;
1356 goto continue_matching;
1357 }
1358 if (failure_stack_start != initial_failure_stack)
1359 free((char *)failure_stack_start);
1360 return -1;
1361
1362 error:
1363 if (failure_stack_start != initial_failure_stack)
1364 free((char *)failure_stack_start);
1365 return -2;
1366 }
1367
1368 #undef PREFETCH
1369 #undef NEXTCHAR
1370 #undef PUSH_FAILURE
1371
re_match_pattern(bufp,string,size,pos,regs)1372 int re_match_pattern(bufp, string, size, pos, regs)
1373 regexp_t bufp;
1374 char *string;
1375 int size, pos;
1376 regexp_registers_t regs;
1377 {
1378 return re_match_pattern_2(bufp, string, size, (char *)NULL, 0, pos, regs, size);
1379 }
1380
re_search_pattern_2(bufp,string1,size1,string2,size2,pos,range,regs,mstop)1381 int re_search_pattern_2(bufp, string1, size1, string2, size2, pos, range, regs,
1382 mstop)
1383 regexp_t bufp;
1384 char *string1, *string2;
1385 int size1, size2, pos, range, mstop;
1386 regexp_registers_t regs;
1387 {
1388 char *fastmap, *translate, *text, *partstart, *partend;
1389 int dir, ret;
1390 char anchor;
1391
1392 assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0);
1393 assert(pos + range >= 0 && pos + range <= size1 + size2);
1394 assert(pos <= mstop);
1395
1396 fastmap = bufp->fastmap;
1397 translate = bufp->translate;
1398 if (fastmap && !bufp->fastmap_accurate)
1399 re_compile_fastmap(bufp);
1400 anchor = bufp->anchor;
1401 if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
1402 fastmap = NULL;
1403 if (range < 0)
1404 {
1405 dir = -1;
1406 range = -range;
1407 }
1408 else
1409 dir = 1;
1410 if (anchor == 2)
1411 if (pos != 0)
1412 return -1;
1413 else
1414 range = 0;
1415 for (; range >= 0; range--, pos += dir)
1416 {
1417 if (fastmap)
1418 {
1419 if (dir == 1)
1420 { /* searching forwards */
1421 if (pos < size1)
1422 {
1423 text = string1 + pos;
1424 if (pos + range > size1)
1425 partend = string1 + size1;
1426 else
1427 partend = string1 + pos + range;
1428 }
1429 else
1430 {
1431 text = string2 + pos - size1;
1432 partend = string2 + pos + range - size1;
1433 }
1434 partstart = text;
1435 if (translate)
1436 while (text != partend &&
1437 !fastmap[(unsigned char)
1438 translate[(unsigned char)*text]])
1439 text++;
1440 else
1441 while (text != partend && !fastmap[(unsigned char)*text])
1442 text++;
1443 pos += text - partstart;
1444 range -= text - partstart;
1445 if (pos == size1 + size2 && bufp->can_be_null == 0)
1446 return -1;
1447 }
1448 else
1449 { /* searching backwards */
1450 if (pos <= size1)
1451 {
1452 text = string1 + pos;
1453 partstart = string1 + pos - range;
1454 }
1455 else
1456 {
1457 text = string2 + pos - size1;
1458 if (range < pos - size1)
1459 partstart = string2 + pos - size1 - range;
1460 else
1461 partstart = string2;
1462 }
1463 partend = text;
1464 if (translate)
1465 while (text != partstart &&
1466 !fastmap[(unsigned char)
1467 translate[(unsigned char)*text]])
1468 text--;
1469 else
1470 while (text != partstart &&
1471 !fastmap[(unsigned char)*text])
1472 text--;
1473 pos -= partend - text;
1474 range -= partend - text;
1475 }
1476 }
1477 if (anchor == 1)
1478 { /* anchored to begline */
1479 if (pos > 0 &&
1480 (pos <= size1 ? string1[pos - 1] :
1481 string2[pos - size1 - 1]) != '\n')
1482 continue;
1483 }
1484 assert(pos >= 0 && pos <= size1 + size2);
1485 ret = re_match_pattern_2(bufp, string1, size1, string2, size2, pos, regs, mstop);
1486 if (ret >= 0)
1487 return pos;
1488 if (ret == -2)
1489 return -2;
1490 }
1491 return -1;
1492 }
1493
re_search_pattern(bufp,string,size,startpos,range,regs)1494 int re_search_pattern(bufp, string, size, startpos, range, regs)
1495 regexp_t bufp;
1496 char *string;
1497 int size, startpos, range;
1498 regexp_registers_t regs;
1499 {
1500 return re_search_pattern_2(bufp, string, size, (char *)NULL, 0,
1501 startpos, range, regs, size);
1502 }
1503
1504 /* 94/01/19 (cjk) hide these on NeXT */
1505 #if !defined(NeXT)
1506 static struct re_pattern_buffer re_comp_buf;
1507
re_comp(s)1508 char *re_comp(s)
1509 char *s;
1510 {
1511 if (s == NULL)
1512 {
1513 if (!re_comp_buf.buffer)
1514 return "Out of memory";
1515 return NULL;
1516 }
1517 if (!re_comp_buf.buffer)
1518 {
1519 /* the buffer will be allocated automatically */
1520 re_comp_buf.fastmap = malloc(256);
1521 re_comp_buf.translate = NULL;
1522 }
1523 return re_compile_pattern(s, strlen(s), &re_comp_buf);
1524 }
1525
re_exec(s)1526 int re_exec(s)
1527 char *s;
1528 {
1529 int len = strlen(s);
1530
1531 return re_search_pattern(&re_comp_buf, s, len, 0, len, (regexp_registers_t)NULL) >= 0;
1532 }
1533 #endif /* !defined(NeXT) */
1534
1535 #ifdef TEST_REGEXP
1536
main()1537 int main()
1538 {
1539 char buf[500];
1540 char *cp;
1541 struct re_pattern_buffer exp;
1542 struct re_registers regs;
1543 int a,pos;
1544 char fastmap[256];
1545
1546 exp.allocated = 0;
1547 exp.buffer = 0;
1548 exp.translate = NULL;
1549 exp.fastmap = fastmap;
1550
1551 /* re_set_syntax(RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_ANSI_HEX); */
1552
1553 while (1)
1554 {
1555 printf("Enter regexp:\n");
1556 gets(buf);
1557 cp=re_compile_pattern(buf, strlen(buf), &exp);
1558 if (cp)
1559 {
1560 printf("Error: %s\n", cp);
1561 continue;
1562 }
1563 re_compile_fastmap(&exp);
1564 printf("dump:\n");
1565 for (pos = 0; pos < exp.used;)
1566 {
1567 printf("%d: ", pos);
1568 switch (exp.buffer[pos++])
1569 {
1570 case Cend:
1571 strcpy(buf, "end");
1572 break;
1573 case Cbol:
1574 strcpy(buf, "bol");
1575 break;
1576 case Ceol:
1577 strcpy(buf, "eol");
1578 break;
1579 case Cset:
1580 strcpy(buf, "set ");
1581 for (a = 0; a < 256/8; a++)
1582 sprintf(buf+strlen(buf)," %02x",
1583 (unsigned char)exp.buffer[pos++]);
1584 break;
1585 case Cexact:
1586 sprintf(buf, "exact '%c' 0x%x", exp.buffer[pos],
1587 (unsigned char)exp.buffer[pos]);
1588 pos++;
1589 break;
1590 case Canychar:
1591 strcpy(buf, "anychar");
1592 break;
1593 case Cstart_memory:
1594 sprintf(buf, "start_memory %d", exp.buffer[pos++]);
1595 break;
1596 case Cend_memory:
1597 sprintf(buf, "end_memory %d", exp.buffer[pos++]);
1598 break;
1599 case Cmatch_memory:
1600 sprintf(buf, "match_memory %d", exp.buffer[pos++]);
1601 break;
1602 case Cjump:
1603 case Cdummy_failure_jump:
1604 case Cstar_jump:
1605 case Cfailure_jump:
1606 case Cupdate_failure_jump:
1607 a = (unsigned char)exp.buffer[pos++];
1608 a += (unsigned char)exp.buffer[pos++] << 8;
1609 a = (int)(short)a;
1610 switch (exp.buffer[pos-3])
1611 {
1612 case Cjump:
1613 cp = "jump";
1614 break;
1615 case Cstar_jump:
1616 cp = "star_jump";
1617 break;
1618 case Cfailure_jump:
1619 cp = "failure_jump";
1620 break;
1621 case Cupdate_failure_jump:
1622 cp = "update_failure_jump";
1623 break;
1624 case Cdummy_failure_jump:
1625 cp = "dummy_failure_jump";
1626 break;
1627 default:
1628 cp = "unknown jump";
1629 break;
1630 }
1631 sprintf(buf, "%s %d", cp, a + pos);
1632 break;
1633 case Cbegbuf:
1634 strcpy(buf,"begbuf");
1635 break;
1636 case Cendbuf:
1637 strcpy(buf,"endbuf");
1638 break;
1639 case Cwordbeg:
1640 strcpy(buf,"wordbeg");
1641 break;
1642 case Cwordend:
1643 strcpy(buf,"wordend");
1644 break;
1645 case Cwordbound:
1646 strcpy(buf,"wordbound");
1647 break;
1648 case Cnotwordbound:
1649 strcpy(buf,"notwordbound");
1650 break;
1651 default:
1652 sprintf(buf, "unknown code %d",
1653 (unsigned char)exp.buffer[pos - 1]);
1654 break;
1655 }
1656 printf("%s\n", buf);
1657 }
1658 printf("can_be_null = %d uses_registers = %d anchor = %d\n",
1659 exp.can_be_null, exp.uses_registers, exp.anchor);
1660
1661 printf("fastmap:");
1662 for (a = 0; a < 256; a++)
1663 if (exp.fastmap[a])
1664 printf(" %d", a);
1665 printf("\n");
1666 printf("Enter strings. An empty line terminates.\n");
1667 while (fgets(buf, sizeof(buf), stdin))
1668 {
1669 if (buf[0] == '\n')
1670 break;
1671 a = re_search_pattern(&exp, buf, strlen(buf), 0, strlen(buf), ®s);
1672 printf("search returns %d\n", a);
1673 if (a != -1)
1674 {
1675 for (a = 0; a < RE_NREGS; a++)
1676 {
1677 printf("buf %d: %d to %d\n", a, regs.start[a], regs.end[a]);
1678 }
1679 }
1680 }
1681 }
1682 }
1683
1684 #endif /* TEST_REGEXP */
1685