1 /********************************************
2 rexp.h
3 copyright 2008-2014,2016, Thomas E. Dickey
4 copyright 2010, Jonathan Nieder
5 copyright 1991,2014, Michael D. Brennan
6 
7 This is a source file for mawk, an implementation of
8 the AWK programming language.
9 
10 Mawk is distributed without warranty under the terms of
11 the GNU General Public License, version 2, 1991.
12 ********************************************/
13 
14 /*
15  * $MawkId: rexp.h,v 1.28 2016/09/30 19:02:09 tom Exp $
16  */
17 
18 #ifndef  REXP_H
19 #define  REXP_H
20 
21 #include "nstd.h"
22 #include "types.h"
23 #include <stdio.h>
24 #include  <setjmp.h>
25 
26 extern PTR RE_malloc(size_t);
27 extern PTR RE_realloc(void *, size_t);
28 
29 #ifdef NO_LEAKS
30 extern void RE_free(void *);
31 #else
32 #define RE_free(p) free(p)
33 #endif
34 
35 /*  finite machine  state types  */
36 
37 #define  M_STR     	0	/* matching a literal string */
38 #define  M_CLASS   	1	/* character class */
39 #define  M_ANY     	2	/* arbitrary character (.) */
40 #define  M_START   	3	/* start of string (^) */
41 #define  M_END     	4	/* end of string ($) */
42 #define  M_U       	5	/* arbitrary string (.*) */
43 #define  M_1J      	6	/* mandatory jump */
44 #define  M_2JA     	7	/* optional (undesirable) jump */
45 #define  M_2JB     	8	/* optional (desirable) jump */
46 #define  M_SAVE_POS	9	/* push position onto stack */
47 #define  M_2JC     	10	/* pop pos'n, optional jump if advanced */
48 #define  M_ACCEPT  	11	/* end of match */
49 #define  U_ON      	12
50 
51 #define  U_OFF     0
52 #define  END_OFF   0
53 #define  END_ON    (2*U_ON)
54 
55 typedef UChar BV[32];		/* bit vector */
56 
57 typedef struct {
58     SType s_type;
59     SLen s_len;			/* used for M_STR  */
60     union {
61 	char *str;		/* string */
62 	BV *bvp;		/*  class  */
63 	int jump;
64     } s_data;
65 } STATE;
66 
67 #define  STATESZ  (sizeof(STATE))
68 
69 typedef struct {
70     STATE *start, *stop;
71 } MACHINE;
72 
73 /*  tokens   */
74 #define  T_NONE   0		/* no token */
75 #define  T_OR     1		/* | */
76 #define  T_CAT    2		/* binary operator */
77 #define  T_STAR   3		/* * */
78 #define  T_PLUS   4		/* + */
79 #define  T_Q      5		/* ? */
80 #define  T_LP     6		/* ( */
81 #define  T_RP     7		/* ) */
82 #define  T_START  8		/* ^ */
83 #define  T_END    9		/* $ */
84 #define  T_ANY   10		/* . */
85 #define  T_CLASS 11		/* starts with [ */
86 #define  T_SLASH 12		/*  \  */
87 #define  T_CHAR  13		/* all the rest */
88 #define  T_STR   14		/* string built of other tokens */
89 #define  T_U     15
90 
91 /*  precedences and error codes  */
92 #define  L   0
93 #define  EQ  1
94 #define  G   2
95 #define  E1  (-1)
96 #define  E2  (-2)
97 #define  E3  (-3)
98 #define  E4  (-4)
99 #define  E5  (-5)
100 #define  E6  (-6)
101 #define  E7  (-7)
102 
103 #define  MEMORY_FAILURE      5
104 
105 #define  ison(b,x)  ((b)[((UChar)(x)) >> 3] & (1 << ((x) & 7)))
106 
107 /* struct for the run time stack */
108 typedef struct {
109     STATE *m;			/* save the machine ptr */
110     int u;			/* save the u_flag */
111     char *s;			/* save the active string ptr */
112     int sp;			/* size of position stack */
113     int tp;			/* offset to top entry of position stack */
114     char *ss;			/* save the match start -- only used by REmatch */
115 } RT_STATE;			/* run time state */
116 
117 /* entry for the position stack */
118 typedef struct {
119     /* if we have not advanced beyond this character,
120      * do not bother trying another round.
121      */
122     const char *pos;
123 
124     /* run time stack frame responsible for removing this node */
125     int owner;
126     /* previous node is this - this->prev_offset.  See RE_pos_pop() */
127     int prev_offset;
128 } RT_POS_ENTRY;
129 
130 /*  error  trap   */
131 extern int REerrno;
132 void RE_error_trap(int);
133 
134 #ifndef GCC_NORETURN
135 #define GCC_NORETURN		/* nothing */
136 #endif
137 
138 extern MACHINE RE_u(void);
139 extern MACHINE RE_start(void);
140 extern MACHINE RE_end(void);
141 extern MACHINE RE_any(void);
142 extern MACHINE RE_str(char *, size_t);
143 extern MACHINE RE_class(BV *);
144 extern void RE_cat(MACHINE *, MACHINE *);
145 extern void RE_or(MACHINE *, MACHINE *);
146 extern void RE_close(MACHINE *);
147 extern void RE_poscl(MACHINE *);
148 extern void RE_01(MACHINE *);
149 extern void RE_panic(const char *) GCC_NORETURN;
150 
151 #ifndef MAWK_H
152 extern char *str_str(char *, size_t, char *, size_t);
153 #endif
154 
155 extern void RE_lex_init(char *, size_t);
156 extern int RE_lex(MACHINE *);
157 extern void RE_run_stack_init(void);
158 extern void RE_pos_stack_init(void);
159 extern RT_STATE *RE_new_run_stack(void);
160 extern RT_POS_ENTRY *RE_new_pos_stack(void);
161 
162 extern RT_STATE *RE_run_stack_base;
163 extern RT_STATE *RE_run_stack_limit;
164 extern RT_STATE *RE_run_stack_empty;
165 
166 extern RT_POS_ENTRY *RE_pos_stack_base;
167 extern RT_POS_ENTRY *RE_pos_stack_limit;
168 extern RT_POS_ENTRY *RE_pos_stack_empty;
169 
170 #ifdef LOCAL_REGEXP
171 static /* inline */ RT_POS_ENTRY *
RE_pos_push(RT_POS_ENTRY * head,const RT_STATE * owner,const char * s)172 RE_pos_push(RT_POS_ENTRY * head, const RT_STATE * owner, const char *s)
173 {
174     head->pos = s;
175     head->owner = (int) (owner - RE_run_stack_base);
176 
177     if (++head == RE_pos_stack_limit) {
178 	head = RE_new_pos_stack();
179     }
180     head->prev_offset = 1;
181     return head;
182 }
183 
184 static /* inline */ const char *
RE_pos_pop(RT_POS_ENTRY ** head,const RT_STATE * current)185 RE_pos_pop(RT_POS_ENTRY ** head, const RT_STATE * current)
186 {
187     RT_POS_ENTRY *prev = *head - (*head)->prev_offset;
188 
189     if (prev->owner == current - RE_run_stack_base) {	/* likely */
190 	/* no need to preserve intervening nodes */
191 	*head = prev;
192     } else if (*head == prev) {
193 	RE_panic("unbalanced M_SAVE_POS and M_2JC");
194     } else {
195 	(*head)->prev_offset += prev->prev_offset;
196     }
197 
198     return prev->pos;
199 }
200 #endif /* LOCAL_REGEXP */
201 
202 #endif /* REXP_H  */
203