1 #ifndef _REGEX_H_ 2 #define _REGEX_H_ /* never again */ 3 /* 4 * regular expressions 5 * 6 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. 7 * 8 * Development of this software was funded, in part, by Cray Research Inc., 9 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 10 * Corporation, none of whom are responsible for the results. The author 11 * thanks all of them. 12 * 13 * Redistribution and use in source and binary forms -- with or without 14 * modification -- are permitted for any purpose, provided that 15 * redistributions in source form retain this entire copyright notice and 16 * indicate the origin and nature of any modifications. 17 * 18 * I'd appreciate being given credit for this package in the documentation 19 * of software which uses it, but that is not a requirement. 20 * 21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 22 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 23 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 24 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 * 33 * 34 * Prototypes etc. marked with "^" within comments get gathered up (and 35 * possibly edited) by the regfwd program and inserted near the bottom of 36 * this file. 37 * 38 * We offer the option of declaring one wide-character version of the 39 * RE functions as well as the char versions. To do that, define 40 * __REG_WIDE_T to the type of wide characters (unfortunately, there 41 * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and 42 * __REG_WIDE_EXEC to the names to be used for the compile and execute 43 * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter 44 * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode). 45 * For cranky old compilers, it may be necessary to do something like: 46 * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) 47 * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) 48 * rather than just #defining the names as parameterless macros. 49 * 50 * For some specialized purposes, it may be desirable to suppress the 51 * declarations of the "front end" functions, regcomp() and regexec(), 52 * or of the char versions of the compile and execute functions. To 53 * suppress the front-end functions, define __REG_NOFRONT. To suppress 54 * the char versions, define __REG_NOCHAR. 55 * 56 * The right place to do those defines (and some others you may want, see 57 * below) would be <sys/types.h>. If you don't have control of that file, 58 * the right place to add your own defines to this file is marked below. 59 * This is normally done automatically, by the makefile and regmkhdr, based 60 * on the contents of regcustom.h. 61 */ 62 63 64 65 /* 66 * voodoo for C++ 67 */ 68 #ifdef __cplusplus 69 extern "C" { 70 #endif 71 72 73 74 /* 75 * Add your own defines, if needed, here. 76 */ 77 78 79 80 /* 81 * Location where a chunk of regcustom.h is automatically spliced into 82 * this file (working from its prototype, regproto.h). 83 */ 84 /* --- begin --- */ 85 /* ensure certain things don't sneak in from system headers */ 86 #ifdef __REG_WIDE_T 87 #undef __REG_WIDE_T 88 #endif 89 #ifdef __REG_WIDE_COMPILE 90 #undef __REG_WIDE_COMPILE 91 #endif 92 #ifdef __REG_WIDE_EXEC 93 #undef __REG_WIDE_EXEC 94 #endif 95 #ifdef __REG_REGOFF_T 96 #undef __REG_REGOFF_T 97 #endif 98 #ifdef __REG_VOID_T 99 #undef __REG_VOID_T 100 #endif 101 #ifdef __REG_CONST 102 #undef __REG_CONST 103 #endif 104 #ifdef __REG_NOFRONT 105 #undef __REG_NOFRONT 106 #endif 107 #ifdef __REG_NOCHAR 108 #undef __REG_NOCHAR 109 #endif 110 #if wxUSE_UNICODE 111 # define __REG_WIDE_T wxChar 112 # define __REG_WIDE_COMPILE wx_re_comp 113 # define __REG_WIDE_EXEC wx_re_exec 114 # define __REG_NOCHAR /* don't want the char versions */ 115 #endif 116 #define __REG_NOFRONT /* don't want regcomp() and regexec() */ 117 #define _ANSI_ARGS_(x) x 118 /* --- end --- */ 119 120 121 /* 122 * interface types etc. 123 */ 124 125 /* 126 * regoff_t has to be large enough to hold either off_t or ssize_t, 127 * and must be signed; it's only a guess that long is suitable, so we 128 * offer <sys/types.h> an override. 129 */ 130 #ifdef __REG_REGOFF_T 131 typedef __REG_REGOFF_T regoff_t; 132 #else 133 typedef long regoff_t; 134 #endif 135 136 /* 137 * For benefit of old compilers, we offer <sys/types.h> the option of 138 * overriding the `void' type used to declare nonexistent return types. 139 */ 140 #ifdef __REG_VOID_T 141 typedef __REG_VOID_T re_void; 142 #else 143 typedef void re_void; 144 #endif 145 146 /* 147 * Also for benefit of old compilers, <sys/types.h> can supply a macro 148 * which expands to a substitute for `const'. 149 */ 150 #ifndef __REG_CONST 151 #define __REG_CONST const 152 #endif 153 154 155 156 /* 157 * other interface types 158 */ 159 160 /* the biggie, a compiled RE (or rather, a front end to same) */ 161 typedef struct { 162 int re_magic; /* magic number */ 163 size_t re_nsub; /* number of subexpressions */ 164 long re_info; /* information about RE */ 165 # define REG_UBACKREF 000001 166 # define REG_ULOOKAHEAD 000002 167 # define REG_UBOUNDS 000004 168 # define REG_UBRACES 000010 169 # define REG_UBSALNUM 000020 170 # define REG_UPBOTCH 000040 171 # define REG_UBBS 000100 172 # define REG_UNONPOSIX 000200 173 # define REG_UUNSPEC 000400 174 # define REG_UUNPORT 001000 175 # define REG_ULOCALE 002000 176 # define REG_UEMPTYMATCH 004000 177 # define REG_UIMPOSSIBLE 010000 178 # define REG_USHORTEST 020000 179 int re_csize; /* sizeof(character) */ 180 char *re_endp; /* backward compatibility kludge */ 181 /* the rest is opaque pointers to hidden innards */ 182 char *re_guts; /* `char *' is more portable than `void *' */ 183 char *re_fns; 184 } regex_t; 185 186 /* result reporting (may acquire more fields later) */ 187 typedef struct { 188 regoff_t rm_so; /* start of substring */ 189 regoff_t rm_eo; /* end of substring */ 190 } regmatch_t; 191 192 /* supplementary control and reporting */ 193 typedef struct { 194 regmatch_t rm_extend; /* see REG_EXPECT */ 195 } rm_detail_t; 196 197 198 199 /* 200 * compilation 201 ^ #ifndef __REG_NOCHAR 202 ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); 203 ^ #endif 204 ^ #ifndef __REG_NOFRONT 205 ^ int regcomp(regex_t *, __REG_CONST char *, int); 206 ^ #endif 207 ^ #ifdef __REG_WIDE_T 208 ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); 209 ^ #endif 210 */ 211 #define REG_BASIC 000000 /* BREs (convenience) */ 212 #define REG_EXTENDED 000001 /* EREs */ 213 #define REG_ADVF 000002 /* advanced features in EREs */ 214 #define REG_ADVANCED 000003 /* AREs (which are also EREs) */ 215 #define REG_QUOTE 000004 /* no special characters, none */ 216 #define REG_NOSPEC REG_QUOTE /* historical synonym */ 217 #define REG_ICASE 000010 /* ignore case */ 218 #define REG_NOSUB 000020 /* don't care about subexpressions */ 219 #define REG_EXPANDED 000040 /* expanded format, white space & comments */ 220 #define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ 221 #define REG_NLANCH 000200 /* ^ matches after \n, $ before */ 222 #define REG_NEWLINE 000300 /* newlines are line terminators */ 223 #define REG_PEND 000400 /* ugh -- backward-compatibility hack */ 224 #define REG_EXPECT 001000 /* report details on partial/limited matches */ 225 #define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ 226 #define REG_DUMP 004000 /* none of your business :-) */ 227 #define REG_FAKE 010000 /* none of your business :-) */ 228 #define REG_PROGRESS 020000 /* none of your business :-) */ 229 230 231 232 /* 233 * execution 234 ^ #ifndef __REG_NOCHAR 235 ^ int re_exec(regex_t *, __REG_CONST char *, size_t, 236 ^ rm_detail_t *, size_t, regmatch_t [], int); 237 ^ #endif 238 ^ #ifndef __REG_NOFRONT 239 ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); 240 ^ #endif 241 ^ #ifdef __REG_WIDE_T 242 ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, 243 ^ rm_detail_t *, size_t, regmatch_t [], int); 244 ^ #endif 245 */ 246 #define REG_NOTBOL 0001 /* BOS is not BOL */ 247 #define REG_NOTEOL 0002 /* EOS is not EOL */ 248 #define REG_STARTEND 0004 /* backward compatibility kludge */ 249 #define REG_FTRACE 0010 /* none of your business */ 250 #define REG_MTRACE 0020 /* none of your business */ 251 #define REG_SMALL 0040 /* none of your business */ 252 253 254 255 /* 256 * misc generics (may be more functions here eventually) 257 ^ re_void regfree(regex_t *); 258 */ 259 260 261 262 /* 263 * error reporting 264 * Be careful if modifying the list of error codes -- the table used by 265 * regerror() is generated automatically from this file! 266 * 267 * Note that there is no wide-char variant of regerror at this time; what 268 * kind of character is used for error reports is independent of what kind 269 * is used in matching. 270 * 271 ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); 272 */ 273 #define REG_OKAY 0 /* no errors detected */ 274 #define REG_NOMATCH 1 /* failed to match */ 275 #define REG_BADPAT 2 /* invalid regexp */ 276 #define REG_ECOLLATE 3 /* invalid collating element */ 277 #define REG_ECTYPE 4 /* invalid character class */ 278 #define REG_EESCAPE 5 /* invalid escape \ sequence */ 279 #define REG_ESUBREG 6 /* invalid backreference number */ 280 #define REG_EBRACK 7 /* brackets [] not balanced */ 281 #define REG_EPAREN 8 /* parentheses () not balanced */ 282 #define REG_EBRACE 9 /* braces {} not balanced */ 283 #define REG_BADBR 10 /* invalid repetition count(s) */ 284 #define REG_ERANGE 11 /* invalid character range */ 285 #define REG_ESPACE 12 /* out of memory */ 286 #define REG_BADRPT 13 /* quantifier operand invalid */ 287 #define REG_ASSERT 15 /* "can't happen" -- you found a bug */ 288 #define REG_INVARG 16 /* invalid argument to regex function */ 289 #define REG_MIXED 17 /* character widths of regex and string differ */ 290 #define REG_BADOPT 18 /* invalid embedded option */ 291 /* two specials for debugging and testing */ 292 #define REG_ATOI 101 /* convert error-code name to number */ 293 #define REG_ITOA 102 /* convert error-code number to name */ 294 295 296 297 /* 298 * the prototypes, as possibly munched by regfwd 299 */ 300 /* =====^!^===== begin forwards =====^!^===== */ 301 /* automatically gathered by fwd; do not hand-edit */ 302 /* === regproto.h === */ 303 #ifndef __REG_NOCHAR 304 int wx_re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int)); 305 #endif 306 #ifndef __REG_NOFRONT 307 int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int)); 308 #endif 309 #ifdef __REG_WIDE_T 310 int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int)); 311 #endif 312 #ifndef __REG_NOCHAR 313 int wx_re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); 314 #endif 315 #ifndef __REG_NOFRONT 316 int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int)); 317 #endif 318 #ifdef __REG_WIDE_T 319 int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); 320 #endif 321 re_void wx_regfree _ANSI_ARGS_((regex_t *)); 322 extern size_t wx_regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t)); 323 /* automatically gathered by fwd; do not hand-edit */ 324 /* =====^!^===== end forwards =====^!^===== */ 325 326 327 328 /* 329 * more C++ voodoo 330 */ 331 #ifdef __cplusplus 332 } 333 #endif 334 335 336 337 #endif 338