1
2 /********************************************
3 rexp.c
4 copyright 1991, Michael D. Brennan
5
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12
13 /*$Log: rexp.c,v $
14 * Revision 3.4 1991/08/13 09:09:59 brennan
15 * VERSION .9994
16 *
17 * Revision 3.3 91/08/04 15:45:03 brennan
18 * no longer attempt to recover mem on failed REcompile
19 * Its not worth the effort
20 *
21 * Revision 3.2 91/08/03 07:24:06 brennan
22 * check for empty machine stack (missing operand) wasn't quite right
23 *
24 * Revision 3.1 91/06/07 10:33:16 brennan
25 * VERSION 0.995
26 *
27 * Revision 1.7 91/06/05 08:58:47 brennan
28 * change RE_free to free
29 *
30 * Revision 1.6 91/06/03 07:07:17 brennan
31 * moved parser stacks inside REcompile
32 * removed unnecessary copying
33 *
34 */
35
36 /* op precedence parser for regular expressions */
37
38 #include "rexp.h"
39
40
41 /* DATA */
42 int REerrno ;
43 char *REerrlist[] = { (char *) 0 ,
44 /* 1 */ "missing '('",
45 /* 2 */ "missing ')'",
46 /* 3 */ "bad class -- [], [^] or [" ,
47 /* 4 */ "missing operand" ,
48 /* 5 */ "resource exhaustion -- regular expression too large"
49 } ;
50 /* E5 is very unlikely to occur */
51
52 /* This table drives the operator precedence parser */
53 static short table[8][8] = {
54
55 /* 0 | CAT * + ? ( ) */
56 /* 0 */ 0, L, L, L, L, L, L, E1,
57 /* | */ G, G, L, L, L, L, L, G,
58 /* CAT*/ G, G, G, L, L, L, L, G,
59 /* * */ G, G, G, G, G, G, E7, G,
60 /* + */ G, G, G, G, G, G, E7, G,
61 /* ? */ G, G, G, G, G, G, E7, G,
62 /* ( */ E2, L, L, L, L, L, L, EQ,
63 /* ) */ G , G, G, G, G, G, E7, G } ;
64
65
66 #define STACKSZ 64
67
68
69 static jmp_buf err_buf ; /* used to trap on error */
70
RE_error_trap(x)71 void RE_error_trap(x) /* return is dummy to make macro OK */
72 int x ;
73 {
74 REerrno = x ;
75 longjmp(err_buf, 1 ) ;
76 }
77
78
REcompile(re)79 VOID *REcompile(re)
80 char *re ;
81 {
82 MACHINE m_stack[STACKSZ] ;
83 struct op {
84 int token ;
85 int prec ;
86 } op_stack[STACKSZ] ;
87 register MACHINE *m_ptr ;
88 register struct op *op_ptr ;
89 register int t ;
90
91 /* do this first because it also checks if we have a
92 run time stack */
93 RE_lex_init(re) ;
94
95 if ( *re == 0 )
96 { STATE *p = (STATE *) RE_malloc( sizeof(STATE) ) ;
97 p->type = M_ACCEPT ;
98 return (VOID *) p ;
99 }
100
101 if ( setjmp(err_buf) ) return (VOID *) 0 ;
102 /* we used to try to recover memory left on machine stack ;
103 but now m_ptr is in a register so it won't be right unless
104 we force it out of a register which isn't worth the trouble */
105
106 /* initialize the stacks */
107 m_ptr = m_stack - 1 ;
108 op_ptr = op_stack ;
109 op_ptr->token = 0 ;
110
111 t = RE_lex(m_stack) ;
112
113 while( 1 )
114 { switch( t )
115 {
116 case T_STR :
117 case T_ANY :
118 case T_U :
119 case T_START :
120 case T_END :
121 case T_CLASS : m_ptr++ ; break ;
122
123 case 0 : /* end of reg expr */
124 if ( op_ptr -> token == 0 ) /* done */
125 if ( m_ptr == m_stack ) return (VOID *)m_ptr->start ;
126 else
127 /* machines still on the stack */
128 RE_panic("values still on machine stack") ;
129
130 /* otherwise fall thru to default
131 which is operator case */
132
133 default:
134
135 if ( (op_ptr -> prec = table[op_ptr -> token][t]) == G )
136 {
137 do
138 { /* op_pop */
139
140 if ( op_ptr->token <= T_CAT ) /*binary op*/ m_ptr-- ;
141 /* if not enough values on machine stack
142 then we have a missing operand */
143 if ( m_ptr < m_stack ) RE_error_trap(-E4) ;
144
145 switch( op_ptr->token )
146 { case T_CAT : RE_cat(m_ptr, m_ptr+1) ; break ;
147 case T_OR : RE_or( m_ptr, m_ptr+1) ; break ;
148 case T_STAR : RE_close( m_ptr) ; break ;
149 case T_PLUS : RE_poscl( m_ptr ) ; break ;
150 case T_Q : RE_01( m_ptr ) ; break ;
151 default : break ; /*nothing on ( or ) */
152 }
153
154 op_ptr-- ;
155 }
156 while ( op_ptr->prec != L ) ;
157
158 continue ; /* back thru switch at top */
159 }
160
161 if ( op_ptr -> prec < 0 )
162 if ( op_ptr->prec == E7 )
163 RE_panic("parser returns E7") ;
164 else RE_error_trap(-op_ptr->prec) ;
165
166 if ( ++op_ptr == op_stack + STACKSZ ) /* stack overflow */
167 RE_error_trap(-E5) ;
168 op_ptr -> token = t ;
169 } /* end of switch */
170
171 if ( m_ptr == m_stack+(STACKSZ-1) ) /*overflow*/
172 RE_error_trap(-E5) ;
173 t = RE_lex(m_ptr+1) ;
174 }
175 }
176
177
178 /* getting here means a logic flaw or unforeseen case */
RE_panic(s)179 void RE_panic( s )
180 char *s ;
181 { fprintf( stderr, "REcompile() - panic: %s\n", s) ;
182 exit(100) ; }
183
184