1 /********************************************
2 rexp.h
3 copyright 2008-2014,2016, Thomas E. Dickey
4 copyright 2010, Jonathan Nieder
5 copyright 1991,2014, Michael D. Brennan
6
7 This is a source file for mawk, an implementation of
8 the AWK programming language.
9
10 Mawk is distributed without warranty under the terms of
11 the GNU General Public License, version 2, 1991.
12 ********************************************/
13
14 /*
15 * $MawkId: rexp.h,v 1.28 2016/09/30 19:02:09 tom Exp $
16 */
17
18 #ifndef REXP_H
19 #define REXP_H
20
21 #include "nstd.h"
22 #include "types.h"
23 #include <stdio.h>
24 #include <setjmp.h>
25
26 extern PTR RE_malloc(size_t);
27 extern PTR RE_realloc(void *, size_t);
28
29 #ifdef NO_LEAKS
30 extern void RE_free(void *);
31 #else
32 #define RE_free(p) free(p)
33 #endif
34
35 /* finite machine state types */
36
37 #define M_STR 0 /* matching a literal string */
38 #define M_CLASS 1 /* character class */
39 #define M_ANY 2 /* arbitrary character (.) */
40 #define M_START 3 /* start of string (^) */
41 #define M_END 4 /* end of string ($) */
42 #define M_U 5 /* arbitrary string (.*) */
43 #define M_1J 6 /* mandatory jump */
44 #define M_2JA 7 /* optional (undesirable) jump */
45 #define M_2JB 8 /* optional (desirable) jump */
46 #define M_SAVE_POS 9 /* push position onto stack */
47 #define M_2JC 10 /* pop pos'n, optional jump if advanced */
48 #define M_ACCEPT 11 /* end of match */
49 #define U_ON 12
50
51 #define U_OFF 0
52 #define END_OFF 0
53 #define END_ON (2*U_ON)
54
55 typedef UChar BV[32]; /* bit vector */
56
57 typedef struct {
58 SType s_type;
59 SLen s_len; /* used for M_STR */
60 union {
61 char *str; /* string */
62 BV *bvp; /* class */
63 int jump;
64 } s_data;
65 } STATE;
66
67 #define STATESZ (sizeof(STATE))
68
69 typedef struct {
70 STATE *start, *stop;
71 } MACHINE;
72
73 /* tokens */
74 #define T_NONE 0 /* no token */
75 #define T_OR 1 /* | */
76 #define T_CAT 2 /* binary operator */
77 #define T_STAR 3 /* * */
78 #define T_PLUS 4 /* + */
79 #define T_Q 5 /* ? */
80 #define T_LP 6 /* ( */
81 #define T_RP 7 /* ) */
82 #define T_START 8 /* ^ */
83 #define T_END 9 /* $ */
84 #define T_ANY 10 /* . */
85 #define T_CLASS 11 /* starts with [ */
86 #define T_SLASH 12 /* \ */
87 #define T_CHAR 13 /* all the rest */
88 #define T_STR 14 /* string built of other tokens */
89 #define T_U 15
90
91 /* precedences and error codes */
92 #define L 0
93 #define EQ 1
94 #define G 2
95 #define E1 (-1)
96 #define E2 (-2)
97 #define E3 (-3)
98 #define E4 (-4)
99 #define E5 (-5)
100 #define E6 (-6)
101 #define E7 (-7)
102
103 #define MEMORY_FAILURE 5
104
105 #define ison(b,x) ((b)[((UChar)(x)) >> 3] & (1 << ((x) & 7)))
106
107 /* struct for the run time stack */
108 typedef struct {
109 STATE *m; /* save the machine ptr */
110 int u; /* save the u_flag */
111 char *s; /* save the active string ptr */
112 int sp; /* size of position stack */
113 int tp; /* offset to top entry of position stack */
114 char *ss; /* save the match start -- only used by REmatch */
115 } RT_STATE; /* run time state */
116
117 /* entry for the position stack */
118 typedef struct {
119 /* if we have not advanced beyond this character,
120 * do not bother trying another round.
121 */
122 const char *pos;
123
124 /* run time stack frame responsible for removing this node */
125 int owner;
126 /* previous node is this - this->prev_offset. See RE_pos_pop() */
127 int prev_offset;
128 } RT_POS_ENTRY;
129
130 /* error trap */
131 extern int REerrno;
132 void RE_error_trap(int);
133
134 #ifndef GCC_NORETURN
135 #define GCC_NORETURN /* nothing */
136 #endif
137
138 extern MACHINE RE_u(void);
139 extern MACHINE RE_start(void);
140 extern MACHINE RE_end(void);
141 extern MACHINE RE_any(void);
142 extern MACHINE RE_str(char *, size_t);
143 extern MACHINE RE_class(BV *);
144 extern void RE_cat(MACHINE *, MACHINE *);
145 extern void RE_or(MACHINE *, MACHINE *);
146 extern void RE_close(MACHINE *);
147 extern void RE_poscl(MACHINE *);
148 extern void RE_01(MACHINE *);
149 extern void RE_panic(const char *) GCC_NORETURN;
150
151 #ifndef MAWK_H
152 extern char *str_str(char *, size_t, char *, size_t);
153 #endif
154
155 extern void RE_lex_init(char *, size_t);
156 extern int RE_lex(MACHINE *);
157 extern void RE_run_stack_init(void);
158 extern void RE_pos_stack_init(void);
159 extern RT_STATE *RE_new_run_stack(void);
160 extern RT_POS_ENTRY *RE_new_pos_stack(void);
161
162 extern RT_STATE *RE_run_stack_base;
163 extern RT_STATE *RE_run_stack_limit;
164 extern RT_STATE *RE_run_stack_empty;
165
166 extern RT_POS_ENTRY *RE_pos_stack_base;
167 extern RT_POS_ENTRY *RE_pos_stack_limit;
168 extern RT_POS_ENTRY *RE_pos_stack_empty;
169
170 #ifdef LOCAL_REGEXP
171 static /* inline */ RT_POS_ENTRY *
RE_pos_push(RT_POS_ENTRY * head,const RT_STATE * owner,const char * s)172 RE_pos_push(RT_POS_ENTRY * head, const RT_STATE * owner, const char *s)
173 {
174 head->pos = s;
175 head->owner = (int) (owner - RE_run_stack_base);
176
177 if (++head == RE_pos_stack_limit) {
178 head = RE_new_pos_stack();
179 }
180 head->prev_offset = 1;
181 return head;
182 }
183
184 static /* inline */ const char *
RE_pos_pop(RT_POS_ENTRY ** head,const RT_STATE * current)185 RE_pos_pop(RT_POS_ENTRY ** head, const RT_STATE * current)
186 {
187 RT_POS_ENTRY *prev = *head - (*head)->prev_offset;
188
189 if (prev->owner == current - RE_run_stack_base) { /* likely */
190 /* no need to preserve intervening nodes */
191 *head = prev;
192 } else if (*head == prev) {
193 RE_panic("unbalanced M_SAVE_POS and M_2JC");
194 } else {
195 (*head)->prev_offset += prev->prev_offset;
196 }
197
198 return prev->pos;
199 }
200 #endif /* LOCAL_REGEXP */
201
202 #endif /* REXP_H */
203