1 /* posix.h - Posix.2 compatibility functions 2 * 3 **************************************************************** 4 * Copyright (C) 1998, 2000 Thomas Lord 5 * 6 * See the file "COPYING" for further information about 7 * the copyright and warranty status of this work. 8 */ 9 10 11 12 #ifndef INCLUDE__RX_POSIX__POSIX_H 13 #define INCLUDE__RX_POSIX__POSIX_H 14 15 16 17 #include "hackerlab/rx-posix/errnorx.h" 18 #include "hackerlab/rx-posix/dup-max.h" 19 #include "hackerlab/rx-posix/match-regexp.h" 20 21 22 23 24 /* RE_DUP_MAX (Required by Posix.2) An upper bound on the values 25 * `m' and `n' in a regexp of the form `RE{m,n}'. 26 * 27 * Posix requires this to be declared in "<limits.h>". 28 * 29 * The correct value for Rx is redefined (as a macro) in "hackerlab/rx-posix/limits.h". 30 */ 31 32 33 /* regoff_t (Required by Posix.2) Used to represent offsets to 34 * substrings within a string matched by `regexec'. 35 * `regoff_t' is a signed arithmetic type that can hold 36 * the largest value that can be stored in either `off_t' 37 * or `ssize_t'. 38 * 39 * This declaration fails to conform to Posix.2 if 40 * either `off_t' or `ssize_t' is `long long' -- but 41 * that is unlikely to be the case. 42 */ 43 typedef long regoff_t; 44 45 /* regmatch_t (Required by Posix.2). Offsets to substrings matched 46 * by parenthesized subexpressions. 47 * 48 * This structure includes the (required) fields: 49 * 50 * regoff_t rm_so; Byte offset from start of string to 51 * start of substring. 52 * regoff_t rm_eo; Byte offset from start of string to 53 * the first character after the end 54 * of substring. 55 */ 56 typedef struct rx_registers regmatch_t; 57 58 /* regex_t (Required by Posix.2) A compiled regexp, filled in 59 * by `regcomp'. 60 */ 61 typedef struct rx_posix_regex 62 { 63 /* re_nsub The number of parenthesized subexpressions. 64 * Filled in by `regcomp'. 65 * 66 * This field is required by Posix.2 67 * 68 */ 69 size_t re_nsub; 70 71 72 /**************************************************************** 73 * The remaining fields are implementation specific and are 74 * filled in by `regcomp'. 75 */ 76 77 /* pattern The expression tree for the pattern. 78 * The reference count for the expression 79 * is incremented for `pattern'. 80 */ 81 struct rx_exp_node * pattern; 82 83 /* subexps Pointers to the `re_nsub' parenthesized subexpressions 84 * of pattern. The reference counts are NOT incremented 85 * for these references. 86 */ 87 struct rx_exp_node ** subexps; 88 89 90 /* icase non-0 iff regcomp was passed REG_ICASE 91 */ 92 int icase; 93 94 /* translate 0 or a 256 element mapping from characters to characters. 95 * Regcomp reads the pattern source string through this table 96 * and compiles a pattern that acts as if the target string 97 * is translated through this table. Used to implement 98 * REG_ICASE. 99 */ 100 t_uchar * translate; 101 102 /* newline_anchor If not 0, ^ matches after \n, $ before \n. 103 * Set by passing REG_NEWLINE to `regcomp'. 104 */ 105 t_uchar newline_anchor; /* If true, an anchor at a newline matches.*/ 106 107 /* is_left_anchored If not 0, the pattern effectively begins with `^'. 108 * For example, "(^abc)|(^def)" is anchored. 109 * 110 * is_right_anchored Similarly for `$'. 111 */ 112 int is_left_anchored; 113 int is_right_anchored; 114 115 /* is_nullable If not 0, the pattern can match the empty string. 116 */ 117 t_uchar is_nullable; 118 119 /* no_sub If not 0, REG_NOSUB was passed to regcomp. 120 */ 121 t_uchar no_sub; 122 123 124 /* fastmap If `fastmap[c]' is 0, the pattern can not match 125 * a string that begins with character `c'. 126 */ 127 t_uchar fastmap[256]; 128 129 /* small_p If not 0, the pattern is "trivial" in such a way that 130 * the cost of pattern->NFA->DFA conversion should be 131 * avoided during matching in favor of matching based 132 * only on the expression tree for the pattern. 133 */ 134 int small_p; 135 136 /* owner_data Arbitrary data, available to whoever called 137 * regcomp. Do not rely on this feature. 138 */ 139 void * owner_data; 140 } regex_t; 141 142 143 144 /* enum rx_cflags 145 * 146 * Values which can be combined by a bitwise inclusive OR to 147 * form the `cflags' argument for `regcomp'. 148 * 149 * Most of these values are required by Posix.2. `REG_DFA_ONLY' is an 150 * implementation-specific flag and should be avoided by programs 151 * which are intended to be portable between implementations of Posix. 152 */ 153 enum rx_cflags 154 { 155 REG_EXTENDED = 1, 156 /* If REG_EXTENDED is set, then use extended regular expression 157 (ERE) syntax. If not set, then use basic regular expression 158 (BRE) syntax. In extended syntax, none of the regexp operators 159 are written with a backslash. */ 160 161 162 REG_ICASE = (REG_EXTENDED << 1), 163 /* If REG_ICASE is set, then ignore case when matching. 164 If not set, then case is significant. */ 165 166 167 REG_NOSUB = (REG_ICASE << 1), 168 /* Report only success/failure in regexec. */ 169 170 REG_NEWLINE = (REG_NOSUB << 1), 171 /* If REG_NEWLINE is set, then "." and complemented character 172 sets do not match at newline characters in the string. Also, 173 "^" and "$" do match at newlines. 174 175 If not set, then anchors do not match at newlines 176 and complimented character sets ordinarily contain newline. */ 177 178 179 REG_DFA_ONLY = (REG_NEWLINE << 1), 180 /* If this bit is set, then restrict the pattern language to patterns 181 that compile to efficient state machines. 182 183 This is a non-standard feature. */ 184 }; 185 186 187 /* enum rx_eflags 188 * 189 * Values which can be combined by a bitwise inclusive OR to 190 * form the `eflags' argument for `regexec'. 191 * 192 * Flags required by Posix.2: 193 * 194 * REG_NOTBOL 195 * REG_NOTEOL 196 * 197 * Implementation-specific (non-standard) flags: 198 * 199 * REG_NO_SUBEXP_REPORTING 200 * REG_ALLOC_REGS 201 */ 202 enum rx_eflags 203 { 204 REG_NOTBOL = 1, 205 /* If REG_NOTBOL is set, then the beginning-of-line operator `^' 206 * doesn't match the beginning of the input string (presumably 207 * because it's not the beginning of a line). If not set, then the 208 * beginning-of-line operator does match the beginning of the 209 * string. 210 * 211 * (Required by Posix.2) 212 */ 213 214 REG_NOTEOL = (REG_NOTBOL << 1), 215 /* REG_NOTEOL is similar to REG_NOTBOL, except that it applies to 216 * the end-of-line operator `$' and the end of the input string. 217 * 218 * (Required by Posix.2) 219 */ 220 221 REG_NO_SUBEXP_REPORTING = (REG_NOTEOL << 1), 222 /* REG_NO_SUBEXP_REPORTING causes `regexec' to fill in only 223 * `pmatch[0]' and to ignore other elements of `pmatch'. For some 224 * patterns (those which do not contain back-references or anchors) 225 * this can speed up matching considerably. 226 * 227 * (non-standard) 228 */ 229 230 REG_ALLOC_REGS = (REG_NO_SUBEXP_REPORTING << 1), 231 /* REG_ALLOC_REGS is only used by `regnexec'. It causes `regnexec' to allocate storage 232 * for `regmatch_t' values. 233 * 234 * (non-standard) 235 */ 236 }; 237 238 239 240 /* automatically generated __STDC__ prototypes */ 241 extern int regcomp (regex_t * preg, const char * pattern, int cflags); 242 extern int regncomp (regex_t * preg, 243 const char * pattern, 244 size_t len, 245 int cflags); 246 extern void regfree (regex_t *preg); 247 extern size_t regerror (int errcode, 248 const regex_t *preg, 249 char *errbuf, 250 size_t errbuf_size); 251 extern int regexec (const regex_t *preg, 252 const char *string, 253 size_t nmatch, 254 regmatch_t pmatch[], 255 int eflags); 256 extern int regnexec (const regex_t *preg, 257 const char *string, 258 regoff_t len, 259 size_t nmatch, 260 regmatch_t **pmatch, 261 int eflags); 262 #endif /* INCLUDE__RX_POSIX__POSIX_H */ 263