1 /*
2  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
3  *
4  * This is NOT the original regular expression code as written by Henry
5  * Spencer.  This code has been modified specifically for use with Vim, and
6  * should not be used apart from compiling Vim.  If you want a good regular
7  * expression library, get the original code.
8  *
9  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
10  */
11 
12 #ifndef NVIM_REGEXP_DEFS_H
13 #define NVIM_REGEXP_DEFS_H
14 
15 #include <stdbool.h>
16 
17 #include "nvim/pos.h"
18 #include "nvim/profile.h"
19 #include "nvim/types.h"
20 
21 /*
22  * The number of sub-matches is limited to 10.
23  * The first one (index 0) is the whole match, referenced with "\0".
24  * The second one (index 1) is the first sub-match, referenced with "\1".
25  * This goes up to the tenth (index 9), referenced with "\9".
26  */
27 #define NSUBEXP  10
28 
29 /*
30  * In the NFA engine: how many braces are allowed.
31  * TODO(RE): Use dynamic memory allocation instead of static, like here
32  */
33 #define NFA_MAX_BRACES 20
34 
35 // In the NFA engine: how many states are allowed.
36 #define NFA_MAX_STATES 100000
37 #define NFA_TOO_EXPENSIVE -1
38 
39 // Which regexp engine to use? Needed for vim_regcomp().
40 // Must match with 'regexpengine'.
41 #define AUTOMATIC_ENGINE    0
42 #define BACKTRACKING_ENGINE 1
43 #define NFA_ENGINE          2
44 
45 typedef struct regengine regengine_T;
46 typedef struct regprog regprog_T;
47 typedef struct reg_extmatch reg_extmatch_T;
48 
49 /// Structure to be used for multi-line matching.
50 /// Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
51 /// and ends in line "endpos[no].lnum" just before column "endpos[no].col".
52 /// The line numbers are relative to the first line, thus startpos[0].lnum is
53 /// always 0.
54 /// When there is no match, the line number is -1.
55 typedef struct {
56   regprog_T *regprog;
57   lpos_T startpos[NSUBEXP];
58   lpos_T endpos[NSUBEXP];
59   int rmm_ic;
60   colnr_T rmm_maxcol;  /// when not zero: maximum column
61 } regmmatch_T;
62 
63 #include "nvim/buffer_defs.h"
64 
65 /*
66  * Structure returned by vim_regcomp() to pass on to vim_regexec().
67  * This is the general structure. For the actual matcher, two specific
68  * structures are used. See code below.
69  */
70 struct regprog {
71   regengine_T *engine;
72   unsigned regflags;
73   unsigned re_engine;  ///< Automatic, backtracking or NFA engine.
74   unsigned re_flags;   ///< Second argument for vim_regcomp().
75   bool re_in_use;      ///< prog is being executed
76 };
77 
78 /*
79  * Structure used by the back track matcher.
80  * These fields are only to be used in regexp.c!
81  * See regexp.c for an explanation.
82  */
83 typedef struct {
84   // These four members implement regprog_T.
85   regengine_T *engine;
86   unsigned regflags;
87   unsigned re_engine;
88   unsigned re_flags;
89   bool re_in_use;
90 
91   int regstart;
92   char_u reganch;
93   char_u *regmust;
94   int regmlen;
95   char_u reghasz;
96   char_u program[1];                    // actually longer..
97 } bt_regprog_T;
98 
99 // Structure representing a NFA state.
100 // An NFA state may have no outgoing edge, when it is a NFA_MATCH state.
101 typedef struct nfa_state nfa_state_T;
102 struct nfa_state {
103   int c;
104   nfa_state_T *out;
105   nfa_state_T *out1;
106   int id;
107   int lastlist[2];                   // 0: normal, 1: recursive
108   int val;
109 };
110 
111 /*
112  * Structure used by the NFA matcher.
113  */
114 typedef struct {
115   // These four members implement regprog_T.
116   regengine_T *engine;
117   unsigned regflags;
118   unsigned re_engine;
119   unsigned re_flags;
120   bool re_in_use;
121 
122   nfa_state_T *start;           // points into state[]
123 
124   int reganch;                          // pattern starts with ^
125   int regstart;                         // char at start of pattern
126   char_u *match_text;      // plain text to match with
127 
128   int has_zend;                         // pattern contains \ze
129   int has_backref;                      // pattern contains \1 .. \9
130   int reghasz;
131   char_u *pattern;
132   int nsubexp;                          // number of ()
133   int nstate;
134   nfa_state_T state[1];                 // actually longer..
135 } nfa_regprog_T;
136 
137 /*
138  * Structure to be used for single-line matching.
139  * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
140  * When there is no match, the pointer is NULL.
141  */
142 typedef struct {
143   regprog_T *regprog;
144   char_u *startp[NSUBEXP];
145   char_u *endp[NSUBEXP];
146   bool rm_ic;
147 } regmatch_T;
148 
149 /*
150  * Structure used to store external references: "\z\(\)" to "\z\1".
151  * Use a reference count to avoid the need to copy this around.  When it goes
152  * from 1 to zero the matches need to be freed.
153  */
154 struct reg_extmatch {
155   int16_t refcnt;
156   char_u *matches[NSUBEXP];
157 };
158 
159 struct regengine {
160   regprog_T *(*regcomp)(char_u *, int);
161   void (*regfree)(regprog_T *);
162   int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, bool);
163   long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T,
164                         proftime_T *, int *);
165   char_u *expr;
166 };
167 
168 #endif  // NVIM_REGEXP_DEFS_H
169