1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
4  *
5  * This is NOT the original regular expression code as written by Henry
6  * Spencer.  This code has been modified specifically for use with Vim, and
7  * should not be used apart from compiling Vim.  If you want a good regular
8  * expression library, get the original code.
9  *
10  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
11  */
12 
13 #ifndef _REGEXP_H
14 #define _REGEXP_H
15 
16 /*
17  * The number of sub-matches is limited to 10.
18  * The first one (index 0) is the whole match, referenced with "\0".
19  * The second one (index 1) is the first sub-match, referenced with "\1".
20  * This goes up to the tenth (index 9), referenced with "\9".
21  */
22 #define NSUBEXP  10
23 
24 /*
25  * In the NFA engine: how many braces are allowed.
26  * TODO(RE): Use dynamic memory allocation instead of static, like here
27  */
28 #define NFA_MAX_BRACES 20
29 
30 /*
31  * In the NFA engine: how many states are allowed
32  */
33 #define NFA_MAX_STATES 100000
34 #define NFA_TOO_EXPENSIVE -1
35 
36 // Which regexp engine to use? Needed for vim_regcomp().
37 // Must match with 'regexpengine'.
38 #define	    AUTOMATIC_ENGINE	0
39 #define	    BACKTRACKING_ENGINE	1
40 #define	    NFA_ENGINE		2
41 
42 typedef struct regengine regengine_T;
43 
44 /*
45  * Structure returned by vim_regcomp() to pass on to vim_regexec().
46  * This is the general structure. For the actual matcher, two specific
47  * structures are used. See code below.
48  */
49 typedef struct regprog
50 {
51     regengine_T		*engine;
52     unsigned		regflags;
53     unsigned		re_engine;   // automatic, backtracking or nfa engine
54     unsigned		re_flags;    // second argument for vim_regcomp()
55     int			re_in_use;   // prog is being executed
56 } regprog_T;
57 
58 /*
59  * Structure used by the back track matcher.
60  * These fields are only to be used in regexp.c!
61  * See regexp.c for an explanation.
62  */
63 typedef struct
64 {
65     // These four members implement regprog_T
66     regengine_T		*engine;
67     unsigned		regflags;
68     unsigned		re_engine;
69     unsigned		re_flags;
70     int			re_in_use;
71 
72     int			regstart;
73     char_u		reganch;
74     char_u		*regmust;
75     int			regmlen;
76 #ifdef FEAT_SYN_HL
77     char_u		reghasz;
78 #endif
79     char_u		program[1];	// actually longer..
80 } bt_regprog_T;
81 
82 /*
83  * Structure representing a NFA state.
84  * An NFA state may have no outgoing edge, when it is a NFA_MATCH state.
85  */
86 typedef struct nfa_state nfa_state_T;
87 struct nfa_state
88 {
89     int			c;
90     nfa_state_T		*out;
91     nfa_state_T		*out1;
92     int			id;
93     int			lastlist[2]; // 0: normal, 1: recursive
94     int			val;
95 };
96 
97 /*
98  * Structure used by the NFA matcher.
99  */
100 typedef struct
101 {
102     // These three members implement regprog_T
103     regengine_T		*engine;
104     unsigned		regflags;
105     unsigned		re_engine;
106     unsigned		re_flags;
107     int			re_in_use;
108 
109     nfa_state_T		*start;		// points into state[]
110 
111     int			reganch;	// pattern starts with ^
112     int			regstart;	// char at start of pattern
113     char_u		*match_text;	// plain text to match with
114 
115     int			has_zend;	// pattern contains \ze
116     int			has_backref;	// pattern contains \1 .. \9
117 #ifdef FEAT_SYN_HL
118     int			reghasz;
119 #endif
120     char_u		*pattern;
121     int			nsubexp;	// number of ()
122     int			nstate;
123     nfa_state_T		state[1];	// actually longer..
124 } nfa_regprog_T;
125 
126 /*
127  * Structure to be used for single-line matching.
128  * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
129  * When there is no match, the pointer is NULL.
130  */
131 typedef struct
132 {
133     regprog_T		*regprog;
134     char_u		*startp[NSUBEXP];
135     char_u		*endp[NSUBEXP];
136     int			rm_ic;
137 } regmatch_T;
138 
139 /*
140  * Structure to be used for multi-line matching.
141  * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
142  * and ends in line "endpos[no].lnum" just before column "endpos[no].col".
143  * The line numbers are relative to the first line, thus startpos[0].lnum is
144  * always 0.
145  * When there is no match, the line number is -1.
146  */
147 typedef struct
148 {
149     regprog_T		*regprog;
150     lpos_T		startpos[NSUBEXP];
151     lpos_T		endpos[NSUBEXP];
152     int			rmm_ic;
153     colnr_T		rmm_maxcol;	// when not zero: maximum column
154 } regmmatch_T;
155 
156 /*
157  * Structure used to store external references: "\z\(\)" to "\z\1".
158  * Use a reference count to avoid the need to copy this around.  When it goes
159  * from 1 to zero the matches need to be freed.
160  */
161 typedef struct
162 {
163     short		refcnt;
164     char_u		*matches[NSUBEXP];
165 } reg_extmatch_T;
166 
167 struct regengine
168 {
169     regprog_T	*(*regcomp)(char_u*, int);
170     void	(*regfree)(regprog_T *);
171     int		(*regexec_nl)(regmatch_T *, char_u *, colnr_T, int);
172     long	(*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, proftime_T *, int *);
173     char_u	*expr;
174 };
175 
176 #endif	// _REGEXP_H
177