1*63eb84d1Schristos /* Definitions for data structures and routines for the regular 2*63eb84d1Schristos expression library. 3*63eb84d1Schristos Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003 4*63eb84d1Schristos Free Software Foundation, Inc. 5*63eb84d1Schristos This file is part of the GNU C Library. 6*63eb84d1Schristos 7*63eb84d1Schristos This program is free software; you can redistribute it and/or modify 8*63eb84d1Schristos it under the terms of the GNU General Public License as published by 9*63eb84d1Schristos the Free Software Foundation; either version 2, or (at your option) 10*63eb84d1Schristos any later version. 11*63eb84d1Schristos 12*63eb84d1Schristos This program is distributed in the hope that it will be useful, 13*63eb84d1Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of 14*63eb84d1Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15*63eb84d1Schristos GNU General Public License for more details. 16*63eb84d1Schristos 17*63eb84d1Schristos You should have received a copy of the GNU General Public License along 18*63eb84d1Schristos with this program; if not, write to the Free Software Foundation, 19*63eb84d1Schristos Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 20*63eb84d1Schristos 21*63eb84d1Schristos #ifndef _REGEX_H 22*63eb84d1Schristos #define _REGEX_H 1 23*63eb84d1Schristos 24*63eb84d1Schristos #include <sys/types.h> 25*63eb84d1Schristos 26*63eb84d1Schristos /* Allow the use in C++ code. */ 27*63eb84d1Schristos #ifdef __cplusplus 28*63eb84d1Schristos extern "C" { 29*63eb84d1Schristos #endif 30*63eb84d1Schristos 31*63eb84d1Schristos /* POSIX says that <sys/types.h> must be included (by the caller) before 32*63eb84d1Schristos <regex.h>. */ 33*63eb84d1Schristos 34*63eb84d1Schristos #if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS 35*63eb84d1Schristos /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it 36*63eb84d1Schristos should be there. */ 37*63eb84d1Schristos # include <stddef.h> 38*63eb84d1Schristos #endif 39*63eb84d1Schristos 40*63eb84d1Schristos /* The following two types have to be signed and unsigned integer type 41*63eb84d1Schristos wide enough to hold a value of a pointer. For most ANSI compilers 42*63eb84d1Schristos ptrdiff_t and size_t should be likely OK. Still size of these two 43*63eb84d1Schristos types is 2 for Microsoft C. Ugh... */ 44*63eb84d1Schristos typedef long int s_reg_t; 45*63eb84d1Schristos typedef unsigned long int active_reg_t; 46*63eb84d1Schristos 47*63eb84d1Schristos /* The following bits are used to determine the regexp syntax we 48*63eb84d1Schristos recognize. The set/not-set meanings are chosen so that Emacs syntax 49*63eb84d1Schristos remains the value 0. The bits are given in alphabetical order, and 50*63eb84d1Schristos the definitions shifted by one from the previous bit; thus, when we 51*63eb84d1Schristos add or remove a bit, only one other definition need change. */ 52*63eb84d1Schristos typedef unsigned long int reg_syntax_t; 53*63eb84d1Schristos 54*63eb84d1Schristos /* If this bit is not set, then \ inside a bracket expression is literal. 55*63eb84d1Schristos If set, then such a \ quotes the following character. */ 56*63eb84d1Schristos #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 57*63eb84d1Schristos 58*63eb84d1Schristos /* If this bit is not set, then + and ? are operators, and \+ and \? are 59*63eb84d1Schristos literals. 60*63eb84d1Schristos If set, then \+ and \? are operators and + and ? are literals. */ 61*63eb84d1Schristos #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 62*63eb84d1Schristos 63*63eb84d1Schristos /* If this bit is set, then character classes are supported. They are: 64*63eb84d1Schristos [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 65*63eb84d1Schristos [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 66*63eb84d1Schristos If not set, then character classes are not supported. */ 67*63eb84d1Schristos #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 68*63eb84d1Schristos 69*63eb84d1Schristos /* If this bit is set, then ^ and $ are always anchors (outside bracket 70*63eb84d1Schristos expressions, of course). 71*63eb84d1Schristos If this bit is not set, then it depends: 72*63eb84d1Schristos ^ is an anchor if it is at the beginning of a regular 73*63eb84d1Schristos expression or after an open-group or an alternation operator; 74*63eb84d1Schristos $ is an anchor if it is at the end of a regular expression, or 75*63eb84d1Schristos before a close-group or an alternation operator. 76*63eb84d1Schristos 77*63eb84d1Schristos This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 78*63eb84d1Schristos POSIX draft 11.2 says that * etc. in leading positions is undefined. 79*63eb84d1Schristos We already implemented a previous draft which made those constructs 80*63eb84d1Schristos invalid, though, so we haven't changed the code back. */ 81*63eb84d1Schristos #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 82*63eb84d1Schristos 83*63eb84d1Schristos /* If this bit is set, then special characters are always special 84*63eb84d1Schristos regardless of where they are in the pattern. 85*63eb84d1Schristos If this bit is not set, then special characters are special only in 86*63eb84d1Schristos some contexts; otherwise they are ordinary. Specifically, 87*63eb84d1Schristos * + ? and intervals are only special when not after the beginning, 88*63eb84d1Schristos open-group, or alternation operator. */ 89*63eb84d1Schristos #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 90*63eb84d1Schristos 91*63eb84d1Schristos /* If this bit is set, then *, +, ?, and { cannot be first in an re or 92*63eb84d1Schristos immediately after an alternation or begin-group operator. */ 93*63eb84d1Schristos #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 94*63eb84d1Schristos 95*63eb84d1Schristos /* If this bit is set, then . matches newline. 96*63eb84d1Schristos If not set, then it doesn't. */ 97*63eb84d1Schristos #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 98*63eb84d1Schristos 99*63eb84d1Schristos /* If this bit is set, then . doesn't match NUL. 100*63eb84d1Schristos If not set, then it does. */ 101*63eb84d1Schristos #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 102*63eb84d1Schristos 103*63eb84d1Schristos /* If this bit is set, nonmatching lists [^...] do not match newline. 104*63eb84d1Schristos If not set, they do. */ 105*63eb84d1Schristos #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 106*63eb84d1Schristos 107*63eb84d1Schristos /* If this bit is set, either \{...\} or {...} defines an 108*63eb84d1Schristos interval, depending on RE_NO_BK_BRACES. 109*63eb84d1Schristos If not set, \{, \}, {, and } are literals. */ 110*63eb84d1Schristos #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 111*63eb84d1Schristos 112*63eb84d1Schristos /* If this bit is set, +, ? and | aren't recognized as operators. 113*63eb84d1Schristos If not set, they are. */ 114*63eb84d1Schristos #define RE_LIMITED_OPS (RE_INTERVALS << 1) 115*63eb84d1Schristos 116*63eb84d1Schristos /* If this bit is set, newline is an alternation operator. 117*63eb84d1Schristos If not set, newline is literal. */ 118*63eb84d1Schristos #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 119*63eb84d1Schristos 120*63eb84d1Schristos /* If this bit is set, then `{...}' defines an interval, and \{ and \} 121*63eb84d1Schristos are literals. 122*63eb84d1Schristos If not set, then `\{...\}' defines an interval. */ 123*63eb84d1Schristos #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 124*63eb84d1Schristos 125*63eb84d1Schristos /* If this bit is set, (...) defines a group, and \( and \) are literals. 126*63eb84d1Schristos If not set, \(...\) defines a group, and ( and ) are literals. */ 127*63eb84d1Schristos #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 128*63eb84d1Schristos 129*63eb84d1Schristos /* If this bit is set, then \<digit> matches <digit>. 130*63eb84d1Schristos If not set, then \<digit> is a back-reference. */ 131*63eb84d1Schristos #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 132*63eb84d1Schristos 133*63eb84d1Schristos /* If this bit is set, then | is an alternation operator, and \| is literal. 134*63eb84d1Schristos If not set, then \| is an alternation operator, and | is literal. */ 135*63eb84d1Schristos #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 136*63eb84d1Schristos 137*63eb84d1Schristos /* If this bit is set, then an ending range point collating higher 138*63eb84d1Schristos than the starting range point, as in [z-a], is invalid. 139*63eb84d1Schristos If not set, then when ending range point collates higher than the 140*63eb84d1Schristos starting range point, the range is ignored. */ 141*63eb84d1Schristos #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 142*63eb84d1Schristos 143*63eb84d1Schristos /* If this bit is set, then an unmatched ) is ordinary. 144*63eb84d1Schristos If not set, then an unmatched ) is invalid. */ 145*63eb84d1Schristos #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 146*63eb84d1Schristos 147*63eb84d1Schristos /* If this bit is set, succeed as soon as we match the whole pattern, 148*63eb84d1Schristos without further backtracking. */ 149*63eb84d1Schristos #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 150*63eb84d1Schristos 151*63eb84d1Schristos /* If this bit is set, do not process the GNU regex operators. 152*63eb84d1Schristos If not set, then the GNU regex operators are recognized. */ 153*63eb84d1Schristos #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 154*63eb84d1Schristos 155*63eb84d1Schristos /* If this bit is set, turn on internal regex debugging. 156*63eb84d1Schristos If not set, and debugging was on, turn it off. 157*63eb84d1Schristos This only works if regex.c is compiled -DDEBUG. 158*63eb84d1Schristos We define this bit always, so that all that's needed to turn on 159*63eb84d1Schristos debugging is to recompile regex.c; the calling code can always have 160*63eb84d1Schristos this bit set, and it won't affect anything in the normal case. */ 161*63eb84d1Schristos #define RE_DEBUG (RE_NO_GNU_OPS << 1) 162*63eb84d1Schristos 163*63eb84d1Schristos /* If this bit is set, a syntactically invalid interval is treated as 164*63eb84d1Schristos a string of ordinary characters. For example, the ERE 'a{1' is 165*63eb84d1Schristos treated as 'a\{1'. */ 166*63eb84d1Schristos #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 167*63eb84d1Schristos 168*63eb84d1Schristos /* If this bit is set, then ignore case when matching. 169*63eb84d1Schristos If not set, then case is significant. */ 170*63eb84d1Schristos #define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) 171*63eb84d1Schristos 172*63eb84d1Schristos /* This global variable defines the particular regexp syntax to use (for 173*63eb84d1Schristos some interfaces). When a regexp is compiled, the syntax used is 174*63eb84d1Schristos stored in the pattern buffer, so changing this does not affect 175*63eb84d1Schristos already-compiled regexps. */ 176*63eb84d1Schristos extern reg_syntax_t re_syntax_options; 177*63eb84d1Schristos 178*63eb84d1Schristos /* Define combinations of the above bits for the standard possibilities. 179*63eb84d1Schristos (The [[[ comments delimit what gets put into the Texinfo file, so 180*63eb84d1Schristos don't delete them!) */ 181*63eb84d1Schristos /* [[[begin syntaxes]]] */ 182*63eb84d1Schristos #define RE_SYNTAX_EMACS 0 183*63eb84d1Schristos 184*63eb84d1Schristos #define RE_SYNTAX_AWK \ 185*63eb84d1Schristos (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 186*63eb84d1Schristos | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 187*63eb84d1Schristos | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 188*63eb84d1Schristos | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 189*63eb84d1Schristos | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 190*63eb84d1Schristos 191*63eb84d1Schristos #define RE_SYNTAX_GNU_AWK \ 192*63eb84d1Schristos ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 193*63eb84d1Schristos & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ 194*63eb84d1Schristos | RE_CONTEXT_INVALID_OPS )) 195*63eb84d1Schristos 196*63eb84d1Schristos #define RE_SYNTAX_POSIX_AWK \ 197*63eb84d1Schristos (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 198*63eb84d1Schristos | RE_INTERVALS | RE_NO_GNU_OPS) 199*63eb84d1Schristos 200*63eb84d1Schristos #define RE_SYNTAX_GREP \ 201*63eb84d1Schristos (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 202*63eb84d1Schristos | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 203*63eb84d1Schristos | RE_NEWLINE_ALT) 204*63eb84d1Schristos 205*63eb84d1Schristos #define RE_SYNTAX_EGREP \ 206*63eb84d1Schristos (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 207*63eb84d1Schristos | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 208*63eb84d1Schristos | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 209*63eb84d1Schristos | RE_NO_BK_VBAR) 210*63eb84d1Schristos 211*63eb84d1Schristos #define RE_SYNTAX_POSIX_EGREP \ 212*63eb84d1Schristos (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 213*63eb84d1Schristos | RE_INVALID_INTERVAL_ORD) 214*63eb84d1Schristos 215*63eb84d1Schristos /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 216*63eb84d1Schristos #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 217*63eb84d1Schristos 218*63eb84d1Schristos #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 219*63eb84d1Schristos 220*63eb84d1Schristos /* Syntax bits common to both basic and extended POSIX regex syntax. */ 221*63eb84d1Schristos #define _RE_SYNTAX_POSIX_COMMON \ 222*63eb84d1Schristos (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 223*63eb84d1Schristos | RE_INTERVALS | RE_NO_EMPTY_RANGES) 224*63eb84d1Schristos 225*63eb84d1Schristos #define RE_SYNTAX_POSIX_BASIC \ 226*63eb84d1Schristos (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 227*63eb84d1Schristos 228*63eb84d1Schristos /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 229*63eb84d1Schristos RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 230*63eb84d1Schristos isn't minimal, since other operators, such as \`, aren't disabled. */ 231*63eb84d1Schristos #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 232*63eb84d1Schristos (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 233*63eb84d1Schristos 234*63eb84d1Schristos #define RE_SYNTAX_POSIX_EXTENDED \ 235*63eb84d1Schristos (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 236*63eb84d1Schristos | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 237*63eb84d1Schristos | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 238*63eb84d1Schristos | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 239*63eb84d1Schristos 240*63eb84d1Schristos /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 241*63eb84d1Schristos removed and RE_NO_BK_REFS is added. */ 242*63eb84d1Schristos #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 243*63eb84d1Schristos (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 244*63eb84d1Schristos | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 245*63eb84d1Schristos | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 246*63eb84d1Schristos | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 247*63eb84d1Schristos /* [[[end syntaxes]]] */ 248*63eb84d1Schristos 249*63eb84d1Schristos /* Maximum number of duplicates an interval can allow. Some systems 250*63eb84d1Schristos (erroneously) define this in other header files, but we want our 251*63eb84d1Schristos value, so remove any previous define. */ 252*63eb84d1Schristos #ifdef RE_DUP_MAX 253*63eb84d1Schristos # undef RE_DUP_MAX 254*63eb84d1Schristos #endif 255*63eb84d1Schristos /* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ 256*63eb84d1Schristos #define RE_DUP_MAX (0x7fff) 257*63eb84d1Schristos 258*63eb84d1Schristos 259*63eb84d1Schristos /* POSIX `cflags' bits (i.e., information for `regcomp'). */ 260*63eb84d1Schristos 261*63eb84d1Schristos /* If this bit is set, then use extended regular expression syntax. 262*63eb84d1Schristos If not set, then use basic regular expression syntax. */ 263*63eb84d1Schristos #define REG_EXTENDED 1 264*63eb84d1Schristos 265*63eb84d1Schristos /* If this bit is set, then ignore case when matching. 266*63eb84d1Schristos If not set, then case is significant. */ 267*63eb84d1Schristos #define REG_ICASE (REG_EXTENDED << 1) 268*63eb84d1Schristos 269*63eb84d1Schristos /* If this bit is set, then anchors do not match at newline 270*63eb84d1Schristos characters in the string. 271*63eb84d1Schristos If not set, then anchors do match at newlines. */ 272*63eb84d1Schristos #define REG_NEWLINE (REG_ICASE << 1) 273*63eb84d1Schristos 274*63eb84d1Schristos /* If this bit is set, then report only success or fail in regexec. 275*63eb84d1Schristos If not set, then returns differ between not matching and errors. */ 276*63eb84d1Schristos #define REG_NOSUB (REG_NEWLINE << 1) 277*63eb84d1Schristos 278*63eb84d1Schristos 279*63eb84d1Schristos /* POSIX `eflags' bits (i.e., information for regexec). */ 280*63eb84d1Schristos 281*63eb84d1Schristos /* If this bit is set, then the beginning-of-line operator doesn't match 282*63eb84d1Schristos the beginning of the string (presumably because it's not the 283*63eb84d1Schristos beginning of a line). 284*63eb84d1Schristos If not set, then the beginning-of-line operator does match the 285*63eb84d1Schristos beginning of the string. */ 286*63eb84d1Schristos #define REG_NOTBOL 1 287*63eb84d1Schristos 288*63eb84d1Schristos /* Like REG_NOTBOL, except for the end-of-line. */ 289*63eb84d1Schristos #define REG_NOTEOL (1 << 1) 290*63eb84d1Schristos 291*63eb84d1Schristos 292*63eb84d1Schristos /* If any error codes are removed, changed, or added, update the 293*63eb84d1Schristos `re_error_msg' table in regex.c. */ 294*63eb84d1Schristos typedef enum 295*63eb84d1Schristos { 296*63eb84d1Schristos #ifdef _XOPEN_SOURCE 297*63eb84d1Schristos REG_ENOSYS = -1, /* This will never happen for this implementation. */ 298*63eb84d1Schristos #endif 299*63eb84d1Schristos 300*63eb84d1Schristos REG_NOERROR = 0, /* Success. */ 301*63eb84d1Schristos REG_NOMATCH, /* Didn't find a match (for regexec). */ 302*63eb84d1Schristos 303*63eb84d1Schristos /* POSIX regcomp return error codes. (In the order listed in the 304*63eb84d1Schristos standard.) */ 305*63eb84d1Schristos REG_BADPAT, /* Invalid pattern. */ 306*63eb84d1Schristos REG_ECOLLATE, /* Not implemented. */ 307*63eb84d1Schristos REG_ECTYPE, /* Invalid character class name. */ 308*63eb84d1Schristos REG_EESCAPE, /* Trailing backslash. */ 309*63eb84d1Schristos REG_ESUBREG, /* Invalid back reference. */ 310*63eb84d1Schristos REG_EBRACK, /* Unmatched left bracket. */ 311*63eb84d1Schristos REG_EPAREN, /* Parenthesis imbalance. */ 312*63eb84d1Schristos REG_EBRACE, /* Unmatched \{. */ 313*63eb84d1Schristos REG_BADBR, /* Invalid contents of \{\}. */ 314*63eb84d1Schristos REG_ERANGE, /* Invalid range end. */ 315*63eb84d1Schristos REG_ESPACE, /* Ran out of memory. */ 316*63eb84d1Schristos REG_BADRPT, /* No preceding re for repetition op. */ 317*63eb84d1Schristos 318*63eb84d1Schristos /* Error codes we've added. */ 319*63eb84d1Schristos REG_EEND, /* Premature end. */ 320*63eb84d1Schristos REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 321*63eb84d1Schristos REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 322*63eb84d1Schristos } reg_errcode_t; 323*63eb84d1Schristos 324*63eb84d1Schristos /* This data structure represents a compiled pattern. Before calling 325*63eb84d1Schristos the pattern compiler, the fields `buffer', `allocated', `fastmap', 326*63eb84d1Schristos `translate', and `no_sub' can be set. After the pattern has been 327*63eb84d1Schristos compiled, the `re_nsub' field is available. All other fields are 328*63eb84d1Schristos private to the regex routines. */ 329*63eb84d1Schristos 330*63eb84d1Schristos #ifndef RE_TRANSLATE_TYPE 331*63eb84d1Schristos # define RE_TRANSLATE_TYPE char * 332*63eb84d1Schristos #endif 333*63eb84d1Schristos 334*63eb84d1Schristos struct re_pattern_buffer 335*63eb84d1Schristos { 336*63eb84d1Schristos /* [[[begin pattern_buffer]]] */ 337*63eb84d1Schristos /* Space that holds the compiled pattern. It is declared as 338*63eb84d1Schristos `unsigned char *' because its elements are 339*63eb84d1Schristos sometimes used as array indexes. */ 340*63eb84d1Schristos unsigned char *buffer; 341*63eb84d1Schristos 342*63eb84d1Schristos /* Number of bytes to which `buffer' points. */ 343*63eb84d1Schristos unsigned long int allocated; 344*63eb84d1Schristos 345*63eb84d1Schristos /* Number of bytes actually used in `buffer'. */ 346*63eb84d1Schristos unsigned long int used; 347*63eb84d1Schristos 348*63eb84d1Schristos /* Syntax setting with which the pattern was compiled. */ 349*63eb84d1Schristos reg_syntax_t syntax; 350*63eb84d1Schristos 351*63eb84d1Schristos /* Pointer to a fastmap, if any, otherwise zero. re_search uses 352*63eb84d1Schristos the fastmap, if there is one, to skip over impossible 353*63eb84d1Schristos starting points for matches. */ 354*63eb84d1Schristos char *fastmap; 355*63eb84d1Schristos 356*63eb84d1Schristos /* Either a translate table to apply to all characters before 357*63eb84d1Schristos comparing them, or zero for no translation. The translation 358*63eb84d1Schristos is applied to a pattern when it is compiled and to a string 359*63eb84d1Schristos when it is matched. */ 360*63eb84d1Schristos RE_TRANSLATE_TYPE translate; 361*63eb84d1Schristos 362*63eb84d1Schristos /* Number of subexpressions found by the compiler. */ 363*63eb84d1Schristos size_t re_nsub; 364*63eb84d1Schristos 365*63eb84d1Schristos /* Zero if this pattern cannot match the empty string, one else. 366*63eb84d1Schristos Well, in truth it's used only in `re_search_2', to see 367*63eb84d1Schristos whether or not we should use the fastmap, so we don't set 368*63eb84d1Schristos this absolutely perfectly; see `re_compile_fastmap' (the 369*63eb84d1Schristos `duplicate' case). */ 370*63eb84d1Schristos unsigned can_be_null : 1; 371*63eb84d1Schristos 372*63eb84d1Schristos /* If REGS_UNALLOCATED, allocate space in the `regs' structure 373*63eb84d1Schristos for `max (RE_NREGS, re_nsub + 1)' groups. 374*63eb84d1Schristos If REGS_REALLOCATE, reallocate space if necessary. 375*63eb84d1Schristos If REGS_FIXED, use what's there. */ 376*63eb84d1Schristos #define REGS_UNALLOCATED 0 377*63eb84d1Schristos #define REGS_REALLOCATE 1 378*63eb84d1Schristos #define REGS_FIXED 2 379*63eb84d1Schristos unsigned regs_allocated : 2; 380*63eb84d1Schristos 381*63eb84d1Schristos /* Set to zero when `regex_compile' compiles a pattern; set to one 382*63eb84d1Schristos by `re_compile_fastmap' if it updates the fastmap. */ 383*63eb84d1Schristos unsigned fastmap_accurate : 1; 384*63eb84d1Schristos 385*63eb84d1Schristos /* If set, `re_match_2' does not return information about 386*63eb84d1Schristos subexpressions. */ 387*63eb84d1Schristos unsigned no_sub : 1; 388*63eb84d1Schristos 389*63eb84d1Schristos /* If set, a beginning-of-line anchor doesn't match at the 390*63eb84d1Schristos beginning of the string. */ 391*63eb84d1Schristos unsigned not_bol : 1; 392*63eb84d1Schristos 393*63eb84d1Schristos /* Similarly for an end-of-line anchor. */ 394*63eb84d1Schristos unsigned not_eol : 1; 395*63eb84d1Schristos 396*63eb84d1Schristos /* If true, an anchor at a newline matches. */ 397*63eb84d1Schristos unsigned newline_anchor : 1; 398*63eb84d1Schristos 399*63eb84d1Schristos /* [[[end pattern_buffer]]] */ 400*63eb84d1Schristos }; 401*63eb84d1Schristos 402*63eb84d1Schristos typedef struct re_pattern_buffer regex_t; 403*63eb84d1Schristos 404*63eb84d1Schristos /* Type for byte offsets within the string. POSIX mandates this. */ 405*63eb84d1Schristos typedef int regoff_t; 406*63eb84d1Schristos 407*63eb84d1Schristos 408*63eb84d1Schristos /* This is the structure we store register match data in. See 409*63eb84d1Schristos regex.texinfo for a full description of what registers match. */ 410*63eb84d1Schristos struct re_registers 411*63eb84d1Schristos { 412*63eb84d1Schristos unsigned num_regs; 413*63eb84d1Schristos regoff_t *start; 414*63eb84d1Schristos regoff_t *end; 415*63eb84d1Schristos }; 416*63eb84d1Schristos 417*63eb84d1Schristos 418*63eb84d1Schristos /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 419*63eb84d1Schristos `re_match_2' returns information about at least this many registers 420*63eb84d1Schristos the first time a `regs' structure is passed. */ 421*63eb84d1Schristos #ifndef RE_NREGS 422*63eb84d1Schristos # define RE_NREGS 30 423*63eb84d1Schristos #endif 424*63eb84d1Schristos 425*63eb84d1Schristos 426*63eb84d1Schristos /* POSIX specification for registers. Aside from the different names than 427*63eb84d1Schristos `re_registers', POSIX uses an array of structures, instead of a 428*63eb84d1Schristos structure of arrays. */ 429*63eb84d1Schristos typedef struct 430*63eb84d1Schristos { 431*63eb84d1Schristos regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 432*63eb84d1Schristos regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 433*63eb84d1Schristos } regmatch_t; 434*63eb84d1Schristos 435*63eb84d1Schristos /* Declarations for routines. */ 436*63eb84d1Schristos 437*63eb84d1Schristos /* Sets the current default syntax to SYNTAX, and return the old syntax. 438*63eb84d1Schristos You can also simply assign to the `re_syntax_options' variable. */ 439*63eb84d1Schristos extern reg_syntax_t re_set_syntax (reg_syntax_t syntax); 440*63eb84d1Schristos 441*63eb84d1Schristos /* Compile the regular expression PATTERN, with length LENGTH 442*63eb84d1Schristos and syntax given by the global `re_syntax_options', into the buffer 443*63eb84d1Schristos BUFFER. Return NULL if successful, and an error string if not. */ 444*63eb84d1Schristos extern const char *re_compile_pattern (const char *pattern, size_t length, 445*63eb84d1Schristos struct re_pattern_buffer *buffer); 446*63eb84d1Schristos 447*63eb84d1Schristos 448*63eb84d1Schristos /* Compile a fastmap for the compiled pattern in BUFFER; used to 449*63eb84d1Schristos accelerate searches. Return 0 if successful and -2 if was an 450*63eb84d1Schristos internal error. */ 451*63eb84d1Schristos extern int re_compile_fastmap (struct re_pattern_buffer *buffer); 452*63eb84d1Schristos 453*63eb84d1Schristos 454*63eb84d1Schristos /* Search in the string STRING (with length LENGTH) for the pattern 455*63eb84d1Schristos compiled into BUFFER. Start searching at position START, for RANGE 456*63eb84d1Schristos characters. Return the starting position of the match, -1 for no 457*63eb84d1Schristos match, or -2 for an internal error. Also return register 458*63eb84d1Schristos information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 459*63eb84d1Schristos extern int re_search (struct re_pattern_buffer *buffer, const char *string, 460*63eb84d1Schristos int length, int start, int range, 461*63eb84d1Schristos struct re_registers *regs); 462*63eb84d1Schristos 463*63eb84d1Schristos 464*63eb84d1Schristos /* Like `re_search', but search in the concatenation of STRING1 and 465*63eb84d1Schristos STRING2. Also, stop searching at index START + STOP. */ 466*63eb84d1Schristos extern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1, 467*63eb84d1Schristos int length1, const char *string2, int length2, 468*63eb84d1Schristos int start, int range, struct re_registers *regs, 469*63eb84d1Schristos int stop); 470*63eb84d1Schristos 471*63eb84d1Schristos 472*63eb84d1Schristos /* Like `re_search', but return how many characters in STRING the regexp 473*63eb84d1Schristos in BUFFER matched, starting at position START. */ 474*63eb84d1Schristos extern int re_match (struct re_pattern_buffer *buffer, const char *string, 475*63eb84d1Schristos int length, int start, struct re_registers *regs); 476*63eb84d1Schristos 477*63eb84d1Schristos 478*63eb84d1Schristos /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 479*63eb84d1Schristos extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, 480*63eb84d1Schristos int length1, const char *string2, int length2, 481*63eb84d1Schristos int start, struct re_registers *regs, int stop); 482*63eb84d1Schristos 483*63eb84d1Schristos 484*63eb84d1Schristos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 485*63eb84d1Schristos ENDS. Subsequent matches using BUFFER and REGS will use this memory 486*63eb84d1Schristos for recording register information. STARTS and ENDS must be 487*63eb84d1Schristos allocated with malloc, and must each be at least `NUM_REGS * sizeof 488*63eb84d1Schristos (regoff_t)' bytes long. 489*63eb84d1Schristos 490*63eb84d1Schristos If NUM_REGS == 0, then subsequent matches should allocate their own 491*63eb84d1Schristos register data. 492*63eb84d1Schristos 493*63eb84d1Schristos Unless this function is called, the first search or match using 494*63eb84d1Schristos PATTERN_BUFFER will allocate its own register data, without 495*63eb84d1Schristos freeing the old data. */ 496*63eb84d1Schristos extern void re_set_registers (struct re_pattern_buffer *buffer, 497*63eb84d1Schristos struct re_registers *regs, unsigned num_regs, 498*63eb84d1Schristos regoff_t *starts, regoff_t *ends); 499*63eb84d1Schristos 500*63eb84d1Schristos #if defined _REGEX_RE_COMP || defined _LIBC 501*63eb84d1Schristos # ifndef _CRAY 502*63eb84d1Schristos /* 4.2 bsd compatibility. */ 503*63eb84d1Schristos extern char *re_comp (const char *); 504*63eb84d1Schristos extern int re_exec (const char *); 505*63eb84d1Schristos # endif 506*63eb84d1Schristos #endif 507*63eb84d1Schristos 508*63eb84d1Schristos /* GCC 2.95 and later have "__restrict"; C99 compilers have 509*63eb84d1Schristos "restrict", and "configure" may have defined "restrict". */ 510*63eb84d1Schristos #ifndef __restrict 511*63eb84d1Schristos # if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) 512*63eb84d1Schristos # if defined restrict || 199901L <= __STDC_VERSION__ 513*63eb84d1Schristos # define __restrict restrict 514*63eb84d1Schristos # else 515*63eb84d1Schristos # define __restrict 516*63eb84d1Schristos # endif 517*63eb84d1Schristos # endif 518*63eb84d1Schristos #endif 519*63eb84d1Schristos /* gcc 3.1 and up support the [restrict] syntax. */ 520*63eb84d1Schristos #ifndef __restrict_arr 521*63eb84d1Schristos # if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) 522*63eb84d1Schristos # define __restrict_arr __restrict 523*63eb84d1Schristos # else 524*63eb84d1Schristos # define __restrict_arr 525*63eb84d1Schristos # endif 526*63eb84d1Schristos #endif 527*63eb84d1Schristos 528*63eb84d1Schristos /* POSIX compatibility. */ 529*63eb84d1Schristos extern int regcomp (regex_t *__restrict __preg, 530*63eb84d1Schristos const char *__restrict __pattern, 531*63eb84d1Schristos int __cflags); 532*63eb84d1Schristos 533*63eb84d1Schristos extern int regexec (const regex_t *__restrict __preg, 534*63eb84d1Schristos const char *__restrict __string, size_t __nmatch, 535*63eb84d1Schristos regmatch_t __pmatch[__restrict_arr], 536*63eb84d1Schristos int __eflags); 537*63eb84d1Schristos 538*63eb84d1Schristos extern size_t regerror (int __errcode, const regex_t *__preg, 539*63eb84d1Schristos char *__errbuf, size_t __errbuf_size); 540*63eb84d1Schristos 541*63eb84d1Schristos extern void regfree (regex_t *__preg); 542*63eb84d1Schristos 543*63eb84d1Schristos 544*63eb84d1Schristos #ifdef __cplusplus 545*63eb84d1Schristos } 546*63eb84d1Schristos #endif /* C++ */ 547*63eb84d1Schristos 548*63eb84d1Schristos #endif /* regex.h */ 549*63eb84d1Schristos 550*63eb84d1Schristos /* 551*63eb84d1Schristos Local variables: 552*63eb84d1Schristos make-backup-files: t 553*63eb84d1Schristos version-control: t 554*63eb84d1Schristos trim-versions-without-asking: nil 555*63eb84d1Schristos End: 556*63eb84d1Schristos */ 557