1 /* 2 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE 3 * 4 * This is NOT the original regular expression code as written by Henry 5 * Spencer. This code has been modified specifically for use with Vim, and 6 * should not be used apart from compiling Vim. If you want a good regular 7 * expression library, get the original code. 8 * 9 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE 10 */ 11 12 #ifndef NVIM_REGEXP_DEFS_H 13 #define NVIM_REGEXP_DEFS_H 14 15 #include <stdbool.h> 16 17 #include "nvim/pos.h" 18 #include "nvim/profile.h" 19 #include "nvim/types.h" 20 21 /* 22 * The number of sub-matches is limited to 10. 23 * The first one (index 0) is the whole match, referenced with "\0". 24 * The second one (index 1) is the first sub-match, referenced with "\1". 25 * This goes up to the tenth (index 9), referenced with "\9". 26 */ 27 #define NSUBEXP 10 28 29 /* 30 * In the NFA engine: how many braces are allowed. 31 * TODO(RE): Use dynamic memory allocation instead of static, like here 32 */ 33 #define NFA_MAX_BRACES 20 34 35 // In the NFA engine: how many states are allowed. 36 #define NFA_MAX_STATES 100000 37 #define NFA_TOO_EXPENSIVE -1 38 39 // Which regexp engine to use? Needed for vim_regcomp(). 40 // Must match with 'regexpengine'. 41 #define AUTOMATIC_ENGINE 0 42 #define BACKTRACKING_ENGINE 1 43 #define NFA_ENGINE 2 44 45 typedef struct regengine regengine_T; 46 typedef struct regprog regprog_T; 47 typedef struct reg_extmatch reg_extmatch_T; 48 49 /// Structure to be used for multi-line matching. 50 /// Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col" 51 /// and ends in line "endpos[no].lnum" just before column "endpos[no].col". 52 /// The line numbers are relative to the first line, thus startpos[0].lnum is 53 /// always 0. 54 /// When there is no match, the line number is -1. 55 typedef struct { 56 regprog_T *regprog; 57 lpos_T startpos[NSUBEXP]; 58 lpos_T endpos[NSUBEXP]; 59 int rmm_ic; 60 colnr_T rmm_maxcol; /// when not zero: maximum column 61 } regmmatch_T; 62 63 #include "nvim/buffer_defs.h" 64 65 /* 66 * Structure returned by vim_regcomp() to pass on to vim_regexec(). 67 * This is the general structure. For the actual matcher, two specific 68 * structures are used. See code below. 69 */ 70 struct regprog { 71 regengine_T *engine; 72 unsigned regflags; 73 unsigned re_engine; ///< Automatic, backtracking or NFA engine. 74 unsigned re_flags; ///< Second argument for vim_regcomp(). 75 bool re_in_use; ///< prog is being executed 76 }; 77 78 /* 79 * Structure used by the back track matcher. 80 * These fields are only to be used in regexp.c! 81 * See regexp.c for an explanation. 82 */ 83 typedef struct { 84 // These four members implement regprog_T. 85 regengine_T *engine; 86 unsigned regflags; 87 unsigned re_engine; 88 unsigned re_flags; 89 bool re_in_use; 90 91 int regstart; 92 char_u reganch; 93 char_u *regmust; 94 int regmlen; 95 char_u reghasz; 96 char_u program[1]; // actually longer.. 97 } bt_regprog_T; 98 99 // Structure representing a NFA state. 100 // An NFA state may have no outgoing edge, when it is a NFA_MATCH state. 101 typedef struct nfa_state nfa_state_T; 102 struct nfa_state { 103 int c; 104 nfa_state_T *out; 105 nfa_state_T *out1; 106 int id; 107 int lastlist[2]; // 0: normal, 1: recursive 108 int val; 109 }; 110 111 /* 112 * Structure used by the NFA matcher. 113 */ 114 typedef struct { 115 // These four members implement regprog_T. 116 regengine_T *engine; 117 unsigned regflags; 118 unsigned re_engine; 119 unsigned re_flags; 120 bool re_in_use; 121 122 nfa_state_T *start; // points into state[] 123 124 int reganch; // pattern starts with ^ 125 int regstart; // char at start of pattern 126 char_u *match_text; // plain text to match with 127 128 int has_zend; // pattern contains \ze 129 int has_backref; // pattern contains \1 .. \9 130 int reghasz; 131 char_u *pattern; 132 int nsubexp; // number of () 133 int nstate; 134 nfa_state_T state[1]; // actually longer.. 135 } nfa_regprog_T; 136 137 /* 138 * Structure to be used for single-line matching. 139 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". 140 * When there is no match, the pointer is NULL. 141 */ 142 typedef struct { 143 regprog_T *regprog; 144 char_u *startp[NSUBEXP]; 145 char_u *endp[NSUBEXP]; 146 bool rm_ic; 147 } regmatch_T; 148 149 /* 150 * Structure used to store external references: "\z\(\)" to "\z\1". 151 * Use a reference count to avoid the need to copy this around. When it goes 152 * from 1 to zero the matches need to be freed. 153 */ 154 struct reg_extmatch { 155 int16_t refcnt; 156 char_u *matches[NSUBEXP]; 157 }; 158 159 struct regengine { 160 regprog_T *(*regcomp)(char_u *, int); 161 void (*regfree)(regprog_T *); 162 int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, bool); 163 long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, 164 proftime_T *, int *); 165 char_u *expr; 166 }; 167 168 #endif // NVIM_REGEXP_DEFS_H 169