1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE 4 * 5 * This is NOT the original regular expression code as written by Henry 6 * Spencer. This code has been modified specifically for use with Vim, and 7 * should not be used apart from compiling Vim. If you want a good regular 8 * expression library, get the original code. 9 * 10 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE 11 */ 12 13 #ifndef _REGEXP_H 14 #define _REGEXP_H 15 16 /* 17 * The number of sub-matches is limited to 10. 18 * The first one (index 0) is the whole match, referenced with "\0". 19 * The second one (index 1) is the first sub-match, referenced with "\1". 20 * This goes up to the tenth (index 9), referenced with "\9". 21 */ 22 #define NSUBEXP 10 23 24 /* 25 * In the NFA engine: how many braces are allowed. 26 * TODO(RE): Use dynamic memory allocation instead of static, like here 27 */ 28 #define NFA_MAX_BRACES 20 29 30 /* 31 * In the NFA engine: how many states are allowed 32 */ 33 #define NFA_MAX_STATES 100000 34 #define NFA_TOO_EXPENSIVE -1 35 36 // Which regexp engine to use? Needed for vim_regcomp(). 37 // Must match with 'regexpengine'. 38 #define AUTOMATIC_ENGINE 0 39 #define BACKTRACKING_ENGINE 1 40 #define NFA_ENGINE 2 41 42 typedef struct regengine regengine_T; 43 44 /* 45 * Structure returned by vim_regcomp() to pass on to vim_regexec(). 46 * This is the general structure. For the actual matcher, two specific 47 * structures are used. See code below. 48 */ 49 typedef struct regprog 50 { 51 regengine_T *engine; 52 unsigned regflags; 53 unsigned re_engine; // automatic, backtracking or nfa engine 54 unsigned re_flags; // second argument for vim_regcomp() 55 int re_in_use; // prog is being executed 56 } regprog_T; 57 58 /* 59 * Structure used by the back track matcher. 60 * These fields are only to be used in regexp.c! 61 * See regexp.c for an explanation. 62 */ 63 typedef struct 64 { 65 // These four members implement regprog_T 66 regengine_T *engine; 67 unsigned regflags; 68 unsigned re_engine; 69 unsigned re_flags; 70 int re_in_use; 71 72 int regstart; 73 char_u reganch; 74 char_u *regmust; 75 int regmlen; 76 #ifdef FEAT_SYN_HL 77 char_u reghasz; 78 #endif 79 char_u program[1]; // actually longer.. 80 } bt_regprog_T; 81 82 /* 83 * Structure representing a NFA state. 84 * An NFA state may have no outgoing edge, when it is a NFA_MATCH state. 85 */ 86 typedef struct nfa_state nfa_state_T; 87 struct nfa_state 88 { 89 int c; 90 nfa_state_T *out; 91 nfa_state_T *out1; 92 int id; 93 int lastlist[2]; // 0: normal, 1: recursive 94 int val; 95 }; 96 97 /* 98 * Structure used by the NFA matcher. 99 */ 100 typedef struct 101 { 102 // These three members implement regprog_T 103 regengine_T *engine; 104 unsigned regflags; 105 unsigned re_engine; 106 unsigned re_flags; 107 int re_in_use; 108 109 nfa_state_T *start; // points into state[] 110 111 int reganch; // pattern starts with ^ 112 int regstart; // char at start of pattern 113 char_u *match_text; // plain text to match with 114 115 int has_zend; // pattern contains \ze 116 int has_backref; // pattern contains \1 .. \9 117 #ifdef FEAT_SYN_HL 118 int reghasz; 119 #endif 120 char_u *pattern; 121 int nsubexp; // number of () 122 int nstate; 123 nfa_state_T state[1]; // actually longer.. 124 } nfa_regprog_T; 125 126 /* 127 * Structure to be used for single-line matching. 128 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". 129 * When there is no match, the pointer is NULL. 130 */ 131 typedef struct 132 { 133 regprog_T *regprog; 134 char_u *startp[NSUBEXP]; 135 char_u *endp[NSUBEXP]; 136 int rm_ic; 137 } regmatch_T; 138 139 /* 140 * Structure to be used for multi-line matching. 141 * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col" 142 * and ends in line "endpos[no].lnum" just before column "endpos[no].col". 143 * The line numbers are relative to the first line, thus startpos[0].lnum is 144 * always 0. 145 * When there is no match, the line number is -1. 146 */ 147 typedef struct 148 { 149 regprog_T *regprog; 150 lpos_T startpos[NSUBEXP]; 151 lpos_T endpos[NSUBEXP]; 152 int rmm_ic; 153 colnr_T rmm_maxcol; // when not zero: maximum column 154 } regmmatch_T; 155 156 /* 157 * Structure used to store external references: "\z\(\)" to "\z\1". 158 * Use a reference count to avoid the need to copy this around. When it goes 159 * from 1 to zero the matches need to be freed. 160 */ 161 typedef struct 162 { 163 short refcnt; 164 char_u *matches[NSUBEXP]; 165 } reg_extmatch_T; 166 167 struct regengine 168 { 169 regprog_T *(*regcomp)(char_u*, int); 170 void (*regfree)(regprog_T *); 171 int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, int); 172 long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, proftime_T *, int *); 173 char_u *expr; 174 }; 175 176 #endif // _REGEXP_H 177