xref: /386bsd/usr/src/usr.bin/awk/rexp/rexp.c (revision a2142627)
1 
2 /********************************************
3 rexp.c
4 copyright 1991, Michael D. Brennan
5 
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8 
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12 
13 /*$Log: rexp.c,v $
14  * Revision 3.4  1991/08/13  09:09:59  brennan
15  * VERSION .9994
16  *
17  * Revision 3.3  91/08/04  15:45:03  brennan
18  * no longer attempt to recover mem on failed REcompile
19  * Its not worth the effort
20  *
21  * Revision 3.2  91/08/03  07:24:06  brennan
22  * check for empty machine stack (missing operand) wasn't quite right
23  *
24  * Revision 3.1  91/06/07  10:33:16  brennan
25  * VERSION 0.995
26  *
27  * Revision 1.7  91/06/05  08:58:47  brennan
28  * change RE_free to free
29  *
30  * Revision 1.6  91/06/03  07:07:17  brennan
31  * moved parser stacks inside REcompile
32  * removed unnecessary copying
33  *
34 */
35 
36 /*  op precedence  parser for regular expressions  */
37 
38 #include  "rexp.h"
39 
40 
41 /*  DATA   */
42 int   REerrno ;
43 char *REerrlist[] = { (char *) 0 ,
44 /* 1  */    "missing '('",
45 /* 2  */    "missing ')'",
46 /* 3  */    "bad class -- [], [^] or [" ,
47 /* 4  */    "missing operand" ,
48 /* 5  */    "resource exhaustion -- regular expression too large"
49 } ;
50 /* E5 is very unlikely to occur */
51 
52 /* This table drives the operator precedence parser */
53 static  short  table[8][8]  =  {
54 
55 /*        0   |   CAT   *   +   ?   (   )   */
56 /* 0 */   0,  L,  L,    L,  L,  L,  L,  E1,
57 /* | */   G,  G,  L,    L,  L,  L,  L,  G,
58 /* CAT*/  G,  G,  G,    L,  L,  L,  L,  G,
59 /* * */   G,  G,  G,    G,  G,  G, E7,  G,
60 /* + */   G,  G,  G,    G,  G,  G, E7,  G,
61 /* ? */   G,  G,  G,    G,  G,  G, E7,  G,
62 /* ( */   E2, L,  L,    L,  L,  L,  L,  EQ,
63 /* ) */   G , G,  G,    G,  G,  G,  E7,  G     }   ;
64 
65 
66 #define  STACKSZ   64
67 
68 
69 static jmp_buf  err_buf  ;  /*  used to trap on error */
70 
RE_error_trap(x)71 void  RE_error_trap(x)  /* return is dummy to make macro OK */
72   int x ;
73 {
74   REerrno = x ;
75   longjmp(err_buf, 1 ) ;
76 }
77 
78 
REcompile(re)79 VOID *REcompile(re)
80   char *re ;
81 {
82   MACHINE  m_stack[STACKSZ] ;
83   struct op {
84     int token ;
85     int prec ;
86   }  op_stack[STACKSZ] ;
87   register MACHINE *m_ptr  ;
88   register struct op *op_ptr ;
89   register int  t ;
90 
91   /* do this first because it also checks if we have a
92      run time stack */
93   RE_lex_init(re) ;
94 
95   if ( *re == 0 )
96   { STATE *p = (STATE *) RE_malloc( sizeof(STATE) ) ;
97     p->type = M_ACCEPT ;
98     return  (VOID *) p ;
99   }
100 
101   if ( setjmp(err_buf) )  return (VOID *) 0 ;
102   /* we used to try to recover memory left on machine stack ;
103      but now m_ptr is in a register so it won't be right unless
104      we force it out of a register which isn't worth the trouble */
105 
106   /* initialize the stacks  */
107   m_ptr = m_stack - 1 ;
108   op_ptr = op_stack ;
109   op_ptr->token = 0 ;
110 
111   t = RE_lex(m_stack) ;
112 
113   while( 1 )
114    { switch( t )
115        {
116          case T_STR  :
117          case T_ANY  :
118          case T_U    :
119          case T_START :
120          case T_END :
121          case T_CLASS :   m_ptr++ ; break ;
122 
123          case  0 :   /*  end of reg expr   */
124            if ( op_ptr -> token == 0 )  /*  done   */
125                if ( m_ptr == m_stack )  return (VOID *)m_ptr->start ;
126                else
127                /*  machines still on the stack  */
128                RE_panic("values still on machine stack") ;
129 
130          /*  otherwise  fall  thru to default
131              which is operator case  */
132 
133          default:
134 
135            if ( (op_ptr -> prec = table[op_ptr -> token][t]) == G )
136            {
137              do
138              {  /* op_pop   */
139 
140                 if ( op_ptr->token <= T_CAT ) /*binary op*/ m_ptr-- ;
141 		/* if not enough values on machine stack
142 		   then we have a missing operand */
143                 if ( m_ptr < m_stack )  RE_error_trap(-E4) ;
144 
145                 switch( op_ptr->token )
146                 {  case  T_CAT :  RE_cat(m_ptr, m_ptr+1) ;  break ;
147                    case  T_OR  :  RE_or( m_ptr, m_ptr+1) ;  break ;
148                    case T_STAR  :  RE_close( m_ptr) ;  break ;
149                    case T_PLUS  :  RE_poscl( m_ptr ) ; break ;
150                    case T_Q     :  RE_01( m_ptr ) ;    break ;
151                    default       :  break ; /*nothing on ( or ) */
152                 }
153 
154                 op_ptr-- ;
155             }
156             while ( op_ptr->prec != L ) ;
157 
158             continue ;  /* back thru switch at top */
159           }
160 
161           if ( op_ptr -> prec < 0 )
162               if ( op_ptr->prec == E7 )
163                   RE_panic("parser returns E7") ;
164               else  RE_error_trap(-op_ptr->prec) ;
165 
166           if ( ++op_ptr == op_stack + STACKSZ ) /* stack overflow */
167                  RE_error_trap(-E5) ;
168           op_ptr -> token = t ;
169        } /* end of switch */
170 
171     if ( m_ptr == m_stack+(STACKSZ-1) ) /*overflow*/
172                        RE_error_trap(-E5) ;
173     t = RE_lex(m_ptr+1) ;
174   }
175 }
176 
177 
178 /* getting here means a logic flaw or unforeseen case */
RE_panic(s)179 void RE_panic( s )
180   char *s ;
181 { fprintf( stderr, "REcompile() - panic:  %s\n", s) ;
182   exit(100) ; }
183 
184