1*63eb84d1Schristos /* Definitions for data structures and routines for the regular
2*63eb84d1Schristos    expression library.
3*63eb84d1Schristos    Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003
4*63eb84d1Schristos    Free Software Foundation, Inc.
5*63eb84d1Schristos    This file is part of the GNU C Library.
6*63eb84d1Schristos 
7*63eb84d1Schristos    This program is free software; you can redistribute it and/or modify
8*63eb84d1Schristos    it under the terms of the GNU General Public License as published by
9*63eb84d1Schristos    the Free Software Foundation; either version 2, or (at your option)
10*63eb84d1Schristos    any later version.
11*63eb84d1Schristos 
12*63eb84d1Schristos    This program is distributed in the hope that it will be useful,
13*63eb84d1Schristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
14*63eb84d1Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15*63eb84d1Schristos    GNU General Public License for more details.
16*63eb84d1Schristos 
17*63eb84d1Schristos    You should have received a copy of the GNU General Public License along
18*63eb84d1Schristos    with this program; if not, write to the Free Software Foundation,
19*63eb84d1Schristos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
20*63eb84d1Schristos 
21*63eb84d1Schristos #ifndef _REGEX_H
22*63eb84d1Schristos #define _REGEX_H 1
23*63eb84d1Schristos 
24*63eb84d1Schristos #include <sys/types.h>
25*63eb84d1Schristos 
26*63eb84d1Schristos /* Allow the use in C++ code.  */
27*63eb84d1Schristos #ifdef __cplusplus
28*63eb84d1Schristos extern "C" {
29*63eb84d1Schristos #endif
30*63eb84d1Schristos 
31*63eb84d1Schristos /* POSIX says that <sys/types.h> must be included (by the caller) before
32*63eb84d1Schristos    <regex.h>.  */
33*63eb84d1Schristos 
34*63eb84d1Schristos #if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
35*63eb84d1Schristos /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
36*63eb84d1Schristos    should be there.  */
37*63eb84d1Schristos # include <stddef.h>
38*63eb84d1Schristos #endif
39*63eb84d1Schristos 
40*63eb84d1Schristos /* The following two types have to be signed and unsigned integer type
41*63eb84d1Schristos    wide enough to hold a value of a pointer.  For most ANSI compilers
42*63eb84d1Schristos    ptrdiff_t and size_t should be likely OK.  Still size of these two
43*63eb84d1Schristos    types is 2 for Microsoft C.  Ugh... */
44*63eb84d1Schristos typedef long int s_reg_t;
45*63eb84d1Schristos typedef unsigned long int active_reg_t;
46*63eb84d1Schristos 
47*63eb84d1Schristos /* The following bits are used to determine the regexp syntax we
48*63eb84d1Schristos    recognize.  The set/not-set meanings are chosen so that Emacs syntax
49*63eb84d1Schristos    remains the value 0.  The bits are given in alphabetical order, and
50*63eb84d1Schristos    the definitions shifted by one from the previous bit; thus, when we
51*63eb84d1Schristos    add or remove a bit, only one other definition need change.  */
52*63eb84d1Schristos typedef unsigned long int reg_syntax_t;
53*63eb84d1Schristos 
54*63eb84d1Schristos /* If this bit is not set, then \ inside a bracket expression is literal.
55*63eb84d1Schristos    If set, then such a \ quotes the following character.  */
56*63eb84d1Schristos #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
57*63eb84d1Schristos 
58*63eb84d1Schristos /* If this bit is not set, then + and ? are operators, and \+ and \? are
59*63eb84d1Schristos      literals.
60*63eb84d1Schristos    If set, then \+ and \? are operators and + and ? are literals.  */
61*63eb84d1Schristos #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
62*63eb84d1Schristos 
63*63eb84d1Schristos /* If this bit is set, then character classes are supported.  They are:
64*63eb84d1Schristos      [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
65*63eb84d1Schristos      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
66*63eb84d1Schristos    If not set, then character classes are not supported.  */
67*63eb84d1Schristos #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
68*63eb84d1Schristos 
69*63eb84d1Schristos /* If this bit is set, then ^ and $ are always anchors (outside bracket
70*63eb84d1Schristos      expressions, of course).
71*63eb84d1Schristos    If this bit is not set, then it depends:
72*63eb84d1Schristos         ^  is an anchor if it is at the beginning of a regular
73*63eb84d1Schristos            expression or after an open-group or an alternation operator;
74*63eb84d1Schristos         $  is an anchor if it is at the end of a regular expression, or
75*63eb84d1Schristos            before a close-group or an alternation operator.
76*63eb84d1Schristos 
77*63eb84d1Schristos    This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
78*63eb84d1Schristos    POSIX draft 11.2 says that * etc. in leading positions is undefined.
79*63eb84d1Schristos    We already implemented a previous draft which made those constructs
80*63eb84d1Schristos    invalid, though, so we haven't changed the code back.  */
81*63eb84d1Schristos #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
82*63eb84d1Schristos 
83*63eb84d1Schristos /* If this bit is set, then special characters are always special
84*63eb84d1Schristos      regardless of where they are in the pattern.
85*63eb84d1Schristos    If this bit is not set, then special characters are special only in
86*63eb84d1Schristos      some contexts; otherwise they are ordinary.  Specifically,
87*63eb84d1Schristos      * + ? and intervals are only special when not after the beginning,
88*63eb84d1Schristos      open-group, or alternation operator.  */
89*63eb84d1Schristos #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
90*63eb84d1Schristos 
91*63eb84d1Schristos /* If this bit is set, then *, +, ?, and { cannot be first in an re or
92*63eb84d1Schristos      immediately after an alternation or begin-group operator.  */
93*63eb84d1Schristos #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
94*63eb84d1Schristos 
95*63eb84d1Schristos /* If this bit is set, then . matches newline.
96*63eb84d1Schristos    If not set, then it doesn't.  */
97*63eb84d1Schristos #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
98*63eb84d1Schristos 
99*63eb84d1Schristos /* If this bit is set, then . doesn't match NUL.
100*63eb84d1Schristos    If not set, then it does.  */
101*63eb84d1Schristos #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
102*63eb84d1Schristos 
103*63eb84d1Schristos /* If this bit is set, nonmatching lists [^...] do not match newline.
104*63eb84d1Schristos    If not set, they do.  */
105*63eb84d1Schristos #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
106*63eb84d1Schristos 
107*63eb84d1Schristos /* If this bit is set, either \{...\} or {...} defines an
108*63eb84d1Schristos      interval, depending on RE_NO_BK_BRACES.
109*63eb84d1Schristos    If not set, \{, \}, {, and } are literals.  */
110*63eb84d1Schristos #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
111*63eb84d1Schristos 
112*63eb84d1Schristos /* If this bit is set, +, ? and | aren't recognized as operators.
113*63eb84d1Schristos    If not set, they are.  */
114*63eb84d1Schristos #define RE_LIMITED_OPS (RE_INTERVALS << 1)
115*63eb84d1Schristos 
116*63eb84d1Schristos /* If this bit is set, newline is an alternation operator.
117*63eb84d1Schristos    If not set, newline is literal.  */
118*63eb84d1Schristos #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
119*63eb84d1Schristos 
120*63eb84d1Schristos /* If this bit is set, then `{...}' defines an interval, and \{ and \}
121*63eb84d1Schristos      are literals.
122*63eb84d1Schristos   If not set, then `\{...\}' defines an interval.  */
123*63eb84d1Schristos #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
124*63eb84d1Schristos 
125*63eb84d1Schristos /* If this bit is set, (...) defines a group, and \( and \) are literals.
126*63eb84d1Schristos    If not set, \(...\) defines a group, and ( and ) are literals.  */
127*63eb84d1Schristos #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
128*63eb84d1Schristos 
129*63eb84d1Schristos /* If this bit is set, then \<digit> matches <digit>.
130*63eb84d1Schristos    If not set, then \<digit> is a back-reference.  */
131*63eb84d1Schristos #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
132*63eb84d1Schristos 
133*63eb84d1Schristos /* If this bit is set, then | is an alternation operator, and \| is literal.
134*63eb84d1Schristos    If not set, then \| is an alternation operator, and | is literal.  */
135*63eb84d1Schristos #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
136*63eb84d1Schristos 
137*63eb84d1Schristos /* If this bit is set, then an ending range point collating higher
138*63eb84d1Schristos      than the starting range point, as in [z-a], is invalid.
139*63eb84d1Schristos    If not set, then when ending range point collates higher than the
140*63eb84d1Schristos      starting range point, the range is ignored.  */
141*63eb84d1Schristos #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
142*63eb84d1Schristos 
143*63eb84d1Schristos /* If this bit is set, then an unmatched ) is ordinary.
144*63eb84d1Schristos    If not set, then an unmatched ) is invalid.  */
145*63eb84d1Schristos #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
146*63eb84d1Schristos 
147*63eb84d1Schristos /* If this bit is set, succeed as soon as we match the whole pattern,
148*63eb84d1Schristos    without further backtracking.  */
149*63eb84d1Schristos #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
150*63eb84d1Schristos 
151*63eb84d1Schristos /* If this bit is set, do not process the GNU regex operators.
152*63eb84d1Schristos    If not set, then the GNU regex operators are recognized. */
153*63eb84d1Schristos #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
154*63eb84d1Schristos 
155*63eb84d1Schristos /* If this bit is set, turn on internal regex debugging.
156*63eb84d1Schristos    If not set, and debugging was on, turn it off.
157*63eb84d1Schristos    This only works if regex.c is compiled -DDEBUG.
158*63eb84d1Schristos    We define this bit always, so that all that's needed to turn on
159*63eb84d1Schristos    debugging is to recompile regex.c; the calling code can always have
160*63eb84d1Schristos    this bit set, and it won't affect anything in the normal case. */
161*63eb84d1Schristos #define RE_DEBUG (RE_NO_GNU_OPS << 1)
162*63eb84d1Schristos 
163*63eb84d1Schristos /* If this bit is set, a syntactically invalid interval is treated as
164*63eb84d1Schristos    a string of ordinary characters.  For example, the ERE 'a{1' is
165*63eb84d1Schristos    treated as 'a\{1'.  */
166*63eb84d1Schristos #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
167*63eb84d1Schristos 
168*63eb84d1Schristos /* If this bit is set, then ignore case when matching.
169*63eb84d1Schristos    If not set, then case is significant.  */
170*63eb84d1Schristos #define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
171*63eb84d1Schristos 
172*63eb84d1Schristos /* This global variable defines the particular regexp syntax to use (for
173*63eb84d1Schristos    some interfaces).  When a regexp is compiled, the syntax used is
174*63eb84d1Schristos    stored in the pattern buffer, so changing this does not affect
175*63eb84d1Schristos    already-compiled regexps.  */
176*63eb84d1Schristos extern reg_syntax_t re_syntax_options;
177*63eb84d1Schristos 
178*63eb84d1Schristos /* Define combinations of the above bits for the standard possibilities.
179*63eb84d1Schristos    (The [[[ comments delimit what gets put into the Texinfo file, so
180*63eb84d1Schristos    don't delete them!)  */
181*63eb84d1Schristos /* [[[begin syntaxes]]] */
182*63eb84d1Schristos #define RE_SYNTAX_EMACS 0
183*63eb84d1Schristos 
184*63eb84d1Schristos #define RE_SYNTAX_AWK							\
185*63eb84d1Schristos   (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
186*63eb84d1Schristos    | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
187*63eb84d1Schristos    | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
188*63eb84d1Schristos    | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
189*63eb84d1Schristos    | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
190*63eb84d1Schristos 
191*63eb84d1Schristos #define RE_SYNTAX_GNU_AWK						\
192*63eb84d1Schristos   ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
193*63eb84d1Schristos    & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS		\
194*63eb84d1Schristos        | RE_CONTEXT_INVALID_OPS ))
195*63eb84d1Schristos 
196*63eb84d1Schristos #define RE_SYNTAX_POSIX_AWK 						\
197*63eb84d1Schristos   (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
198*63eb84d1Schristos    | RE_INTERVALS	    | RE_NO_GNU_OPS)
199*63eb84d1Schristos 
200*63eb84d1Schristos #define RE_SYNTAX_GREP							\
201*63eb84d1Schristos   (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
202*63eb84d1Schristos    | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
203*63eb84d1Schristos    | RE_NEWLINE_ALT)
204*63eb84d1Schristos 
205*63eb84d1Schristos #define RE_SYNTAX_EGREP							\
206*63eb84d1Schristos   (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
207*63eb84d1Schristos    | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
208*63eb84d1Schristos    | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
209*63eb84d1Schristos    | RE_NO_BK_VBAR)
210*63eb84d1Schristos 
211*63eb84d1Schristos #define RE_SYNTAX_POSIX_EGREP						\
212*63eb84d1Schristos   (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
213*63eb84d1Schristos    | RE_INVALID_INTERVAL_ORD)
214*63eb84d1Schristos 
215*63eb84d1Schristos /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
216*63eb84d1Schristos #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
217*63eb84d1Schristos 
218*63eb84d1Schristos #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
219*63eb84d1Schristos 
220*63eb84d1Schristos /* Syntax bits common to both basic and extended POSIX regex syntax.  */
221*63eb84d1Schristos #define _RE_SYNTAX_POSIX_COMMON						\
222*63eb84d1Schristos   (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
223*63eb84d1Schristos    | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
224*63eb84d1Schristos 
225*63eb84d1Schristos #define RE_SYNTAX_POSIX_BASIC						\
226*63eb84d1Schristos   (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
227*63eb84d1Schristos 
228*63eb84d1Schristos /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
229*63eb84d1Schristos    RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
230*63eb84d1Schristos    isn't minimal, since other operators, such as \`, aren't disabled.  */
231*63eb84d1Schristos #define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
232*63eb84d1Schristos   (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
233*63eb84d1Schristos 
234*63eb84d1Schristos #define RE_SYNTAX_POSIX_EXTENDED					\
235*63eb84d1Schristos   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
236*63eb84d1Schristos    | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
237*63eb84d1Schristos    | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
238*63eb84d1Schristos    | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
239*63eb84d1Schristos 
240*63eb84d1Schristos /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
241*63eb84d1Schristos    removed and RE_NO_BK_REFS is added.  */
242*63eb84d1Schristos #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
243*63eb84d1Schristos   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
244*63eb84d1Schristos    | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
245*63eb84d1Schristos    | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
246*63eb84d1Schristos    | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
247*63eb84d1Schristos /* [[[end syntaxes]]] */
248*63eb84d1Schristos 
249*63eb84d1Schristos /* Maximum number of duplicates an interval can allow.  Some systems
250*63eb84d1Schristos    (erroneously) define this in other header files, but we want our
251*63eb84d1Schristos    value, so remove any previous define.  */
252*63eb84d1Schristos #ifdef RE_DUP_MAX
253*63eb84d1Schristos # undef RE_DUP_MAX
254*63eb84d1Schristos #endif
255*63eb84d1Schristos /* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
256*63eb84d1Schristos #define RE_DUP_MAX (0x7fff)
257*63eb84d1Schristos 
258*63eb84d1Schristos 
259*63eb84d1Schristos /* POSIX `cflags' bits (i.e., information for `regcomp').  */
260*63eb84d1Schristos 
261*63eb84d1Schristos /* If this bit is set, then use extended regular expression syntax.
262*63eb84d1Schristos    If not set, then use basic regular expression syntax.  */
263*63eb84d1Schristos #define REG_EXTENDED 1
264*63eb84d1Schristos 
265*63eb84d1Schristos /* If this bit is set, then ignore case when matching.
266*63eb84d1Schristos    If not set, then case is significant.  */
267*63eb84d1Schristos #define REG_ICASE (REG_EXTENDED << 1)
268*63eb84d1Schristos 
269*63eb84d1Schristos /* If this bit is set, then anchors do not match at newline
270*63eb84d1Schristos      characters in the string.
271*63eb84d1Schristos    If not set, then anchors do match at newlines.  */
272*63eb84d1Schristos #define REG_NEWLINE (REG_ICASE << 1)
273*63eb84d1Schristos 
274*63eb84d1Schristos /* If this bit is set, then report only success or fail in regexec.
275*63eb84d1Schristos    If not set, then returns differ between not matching and errors.  */
276*63eb84d1Schristos #define REG_NOSUB (REG_NEWLINE << 1)
277*63eb84d1Schristos 
278*63eb84d1Schristos 
279*63eb84d1Schristos /* POSIX `eflags' bits (i.e., information for regexec).  */
280*63eb84d1Schristos 
281*63eb84d1Schristos /* If this bit is set, then the beginning-of-line operator doesn't match
282*63eb84d1Schristos      the beginning of the string (presumably because it's not the
283*63eb84d1Schristos      beginning of a line).
284*63eb84d1Schristos    If not set, then the beginning-of-line operator does match the
285*63eb84d1Schristos      beginning of the string.  */
286*63eb84d1Schristos #define REG_NOTBOL 1
287*63eb84d1Schristos 
288*63eb84d1Schristos /* Like REG_NOTBOL, except for the end-of-line.  */
289*63eb84d1Schristos #define REG_NOTEOL (1 << 1)
290*63eb84d1Schristos 
291*63eb84d1Schristos 
292*63eb84d1Schristos /* If any error codes are removed, changed, or added, update the
293*63eb84d1Schristos    `re_error_msg' table in regex.c.  */
294*63eb84d1Schristos typedef enum
295*63eb84d1Schristos {
296*63eb84d1Schristos #ifdef _XOPEN_SOURCE
297*63eb84d1Schristos   REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
298*63eb84d1Schristos #endif
299*63eb84d1Schristos 
300*63eb84d1Schristos   REG_NOERROR = 0,	/* Success.  */
301*63eb84d1Schristos   REG_NOMATCH,		/* Didn't find a match (for regexec).  */
302*63eb84d1Schristos 
303*63eb84d1Schristos   /* POSIX regcomp return error codes.  (In the order listed in the
304*63eb84d1Schristos      standard.)  */
305*63eb84d1Schristos   REG_BADPAT,		/* Invalid pattern.  */
306*63eb84d1Schristos   REG_ECOLLATE,		/* Not implemented.  */
307*63eb84d1Schristos   REG_ECTYPE,		/* Invalid character class name.  */
308*63eb84d1Schristos   REG_EESCAPE,		/* Trailing backslash.  */
309*63eb84d1Schristos   REG_ESUBREG,		/* Invalid back reference.  */
310*63eb84d1Schristos   REG_EBRACK,		/* Unmatched left bracket.  */
311*63eb84d1Schristos   REG_EPAREN,		/* Parenthesis imbalance.  */
312*63eb84d1Schristos   REG_EBRACE,		/* Unmatched \{.  */
313*63eb84d1Schristos   REG_BADBR,		/* Invalid contents of \{\}.  */
314*63eb84d1Schristos   REG_ERANGE,		/* Invalid range end.  */
315*63eb84d1Schristos   REG_ESPACE,		/* Ran out of memory.  */
316*63eb84d1Schristos   REG_BADRPT,		/* No preceding re for repetition op.  */
317*63eb84d1Schristos 
318*63eb84d1Schristos   /* Error codes we've added.  */
319*63eb84d1Schristos   REG_EEND,		/* Premature end.  */
320*63eb84d1Schristos   REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
321*63eb84d1Schristos   REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
322*63eb84d1Schristos } reg_errcode_t;
323*63eb84d1Schristos 
324*63eb84d1Schristos /* This data structure represents a compiled pattern.  Before calling
325*63eb84d1Schristos    the pattern compiler, the fields `buffer', `allocated', `fastmap',
326*63eb84d1Schristos    `translate', and `no_sub' can be set.  After the pattern has been
327*63eb84d1Schristos    compiled, the `re_nsub' field is available.  All other fields are
328*63eb84d1Schristos    private to the regex routines.  */
329*63eb84d1Schristos 
330*63eb84d1Schristos #ifndef RE_TRANSLATE_TYPE
331*63eb84d1Schristos # define RE_TRANSLATE_TYPE char *
332*63eb84d1Schristos #endif
333*63eb84d1Schristos 
334*63eb84d1Schristos struct re_pattern_buffer
335*63eb84d1Schristos {
336*63eb84d1Schristos /* [[[begin pattern_buffer]]] */
337*63eb84d1Schristos 	/* Space that holds the compiled pattern.  It is declared as
338*63eb84d1Schristos           `unsigned char *' because its elements are
339*63eb84d1Schristos            sometimes used as array indexes.  */
340*63eb84d1Schristos   unsigned char *buffer;
341*63eb84d1Schristos 
342*63eb84d1Schristos 	/* Number of bytes to which `buffer' points.  */
343*63eb84d1Schristos   unsigned long int allocated;
344*63eb84d1Schristos 
345*63eb84d1Schristos 	/* Number of bytes actually used in `buffer'.  */
346*63eb84d1Schristos   unsigned long int used;
347*63eb84d1Schristos 
348*63eb84d1Schristos         /* Syntax setting with which the pattern was compiled.  */
349*63eb84d1Schristos   reg_syntax_t syntax;
350*63eb84d1Schristos 
351*63eb84d1Schristos         /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
352*63eb84d1Schristos            the fastmap, if there is one, to skip over impossible
353*63eb84d1Schristos            starting points for matches.  */
354*63eb84d1Schristos   char *fastmap;
355*63eb84d1Schristos 
356*63eb84d1Schristos         /* Either a translate table to apply to all characters before
357*63eb84d1Schristos            comparing them, or zero for no translation.  The translation
358*63eb84d1Schristos            is applied to a pattern when it is compiled and to a string
359*63eb84d1Schristos            when it is matched.  */
360*63eb84d1Schristos   RE_TRANSLATE_TYPE translate;
361*63eb84d1Schristos 
362*63eb84d1Schristos 	/* Number of subexpressions found by the compiler.  */
363*63eb84d1Schristos   size_t re_nsub;
364*63eb84d1Schristos 
365*63eb84d1Schristos         /* Zero if this pattern cannot match the empty string, one else.
366*63eb84d1Schristos            Well, in truth it's used only in `re_search_2', to see
367*63eb84d1Schristos            whether or not we should use the fastmap, so we don't set
368*63eb84d1Schristos            this absolutely perfectly; see `re_compile_fastmap' (the
369*63eb84d1Schristos            `duplicate' case).  */
370*63eb84d1Schristos   unsigned can_be_null : 1;
371*63eb84d1Schristos 
372*63eb84d1Schristos         /* If REGS_UNALLOCATED, allocate space in the `regs' structure
373*63eb84d1Schristos              for `max (RE_NREGS, re_nsub + 1)' groups.
374*63eb84d1Schristos            If REGS_REALLOCATE, reallocate space if necessary.
375*63eb84d1Schristos            If REGS_FIXED, use what's there.  */
376*63eb84d1Schristos #define REGS_UNALLOCATED 0
377*63eb84d1Schristos #define REGS_REALLOCATE 1
378*63eb84d1Schristos #define REGS_FIXED 2
379*63eb84d1Schristos   unsigned regs_allocated : 2;
380*63eb84d1Schristos 
381*63eb84d1Schristos         /* Set to zero when `regex_compile' compiles a pattern; set to one
382*63eb84d1Schristos            by `re_compile_fastmap' if it updates the fastmap.  */
383*63eb84d1Schristos   unsigned fastmap_accurate : 1;
384*63eb84d1Schristos 
385*63eb84d1Schristos         /* If set, `re_match_2' does not return information about
386*63eb84d1Schristos            subexpressions.  */
387*63eb84d1Schristos   unsigned no_sub : 1;
388*63eb84d1Schristos 
389*63eb84d1Schristos         /* If set, a beginning-of-line anchor doesn't match at the
390*63eb84d1Schristos            beginning of the string.  */
391*63eb84d1Schristos   unsigned not_bol : 1;
392*63eb84d1Schristos 
393*63eb84d1Schristos         /* Similarly for an end-of-line anchor.  */
394*63eb84d1Schristos   unsigned not_eol : 1;
395*63eb84d1Schristos 
396*63eb84d1Schristos         /* If true, an anchor at a newline matches.  */
397*63eb84d1Schristos   unsigned newline_anchor : 1;
398*63eb84d1Schristos 
399*63eb84d1Schristos /* [[[end pattern_buffer]]] */
400*63eb84d1Schristos };
401*63eb84d1Schristos 
402*63eb84d1Schristos typedef struct re_pattern_buffer regex_t;
403*63eb84d1Schristos 
404*63eb84d1Schristos /* Type for byte offsets within the string.  POSIX mandates this.  */
405*63eb84d1Schristos typedef int regoff_t;
406*63eb84d1Schristos 
407*63eb84d1Schristos 
408*63eb84d1Schristos /* This is the structure we store register match data in.  See
409*63eb84d1Schristos    regex.texinfo for a full description of what registers match.  */
410*63eb84d1Schristos struct re_registers
411*63eb84d1Schristos {
412*63eb84d1Schristos   unsigned num_regs;
413*63eb84d1Schristos   regoff_t *start;
414*63eb84d1Schristos   regoff_t *end;
415*63eb84d1Schristos };
416*63eb84d1Schristos 
417*63eb84d1Schristos 
418*63eb84d1Schristos /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
419*63eb84d1Schristos    `re_match_2' returns information about at least this many registers
420*63eb84d1Schristos    the first time a `regs' structure is passed.  */
421*63eb84d1Schristos #ifndef RE_NREGS
422*63eb84d1Schristos # define RE_NREGS 30
423*63eb84d1Schristos #endif
424*63eb84d1Schristos 
425*63eb84d1Schristos 
426*63eb84d1Schristos /* POSIX specification for registers.  Aside from the different names than
427*63eb84d1Schristos    `re_registers', POSIX uses an array of structures, instead of a
428*63eb84d1Schristos    structure of arrays.  */
429*63eb84d1Schristos typedef struct
430*63eb84d1Schristos {
431*63eb84d1Schristos   regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
432*63eb84d1Schristos   regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
433*63eb84d1Schristos } regmatch_t;
434*63eb84d1Schristos 
435*63eb84d1Schristos /* Declarations for routines.  */
436*63eb84d1Schristos 
437*63eb84d1Schristos /* Sets the current default syntax to SYNTAX, and return the old syntax.
438*63eb84d1Schristos    You can also simply assign to the `re_syntax_options' variable.  */
439*63eb84d1Schristos extern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
440*63eb84d1Schristos 
441*63eb84d1Schristos /* Compile the regular expression PATTERN, with length LENGTH
442*63eb84d1Schristos    and syntax given by the global `re_syntax_options', into the buffer
443*63eb84d1Schristos    BUFFER.  Return NULL if successful, and an error string if not.  */
444*63eb84d1Schristos extern const char *re_compile_pattern (const char *pattern, size_t length,
445*63eb84d1Schristos 				       struct re_pattern_buffer *buffer);
446*63eb84d1Schristos 
447*63eb84d1Schristos 
448*63eb84d1Schristos /* Compile a fastmap for the compiled pattern in BUFFER; used to
449*63eb84d1Schristos    accelerate searches.  Return 0 if successful and -2 if was an
450*63eb84d1Schristos    internal error.  */
451*63eb84d1Schristos extern int re_compile_fastmap (struct re_pattern_buffer *buffer);
452*63eb84d1Schristos 
453*63eb84d1Schristos 
454*63eb84d1Schristos /* Search in the string STRING (with length LENGTH) for the pattern
455*63eb84d1Schristos    compiled into BUFFER.  Start searching at position START, for RANGE
456*63eb84d1Schristos    characters.  Return the starting position of the match, -1 for no
457*63eb84d1Schristos    match, or -2 for an internal error.  Also return register
458*63eb84d1Schristos    information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
459*63eb84d1Schristos extern int re_search (struct re_pattern_buffer *buffer, const char *string,
460*63eb84d1Schristos 		      int length, int start, int range,
461*63eb84d1Schristos 		      struct re_registers *regs);
462*63eb84d1Schristos 
463*63eb84d1Schristos 
464*63eb84d1Schristos /* Like `re_search', but search in the concatenation of STRING1 and
465*63eb84d1Schristos    STRING2.  Also, stop searching at index START + STOP.  */
466*63eb84d1Schristos extern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1,
467*63eb84d1Schristos 			int length1, const char *string2, int length2,
468*63eb84d1Schristos 			int start, int range, struct re_registers *regs,
469*63eb84d1Schristos 			int stop);
470*63eb84d1Schristos 
471*63eb84d1Schristos 
472*63eb84d1Schristos /* Like `re_search', but return how many characters in STRING the regexp
473*63eb84d1Schristos    in BUFFER matched, starting at position START.  */
474*63eb84d1Schristos extern int re_match (struct re_pattern_buffer *buffer, const char *string,
475*63eb84d1Schristos 		     int length, int start, struct re_registers *regs);
476*63eb84d1Schristos 
477*63eb84d1Schristos 
478*63eb84d1Schristos /* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
479*63eb84d1Schristos extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1,
480*63eb84d1Schristos 		       int length1, const char *string2, int length2,
481*63eb84d1Schristos 		       int start, struct re_registers *regs, int stop);
482*63eb84d1Schristos 
483*63eb84d1Schristos 
484*63eb84d1Schristos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
485*63eb84d1Schristos    ENDS.  Subsequent matches using BUFFER and REGS will use this memory
486*63eb84d1Schristos    for recording register information.  STARTS and ENDS must be
487*63eb84d1Schristos    allocated with malloc, and must each be at least `NUM_REGS * sizeof
488*63eb84d1Schristos    (regoff_t)' bytes long.
489*63eb84d1Schristos 
490*63eb84d1Schristos    If NUM_REGS == 0, then subsequent matches should allocate their own
491*63eb84d1Schristos    register data.
492*63eb84d1Schristos 
493*63eb84d1Schristos    Unless this function is called, the first search or match using
494*63eb84d1Schristos    PATTERN_BUFFER will allocate its own register data, without
495*63eb84d1Schristos    freeing the old data.  */
496*63eb84d1Schristos extern void re_set_registers (struct re_pattern_buffer *buffer,
497*63eb84d1Schristos 			      struct re_registers *regs, unsigned num_regs,
498*63eb84d1Schristos 			      regoff_t *starts, regoff_t *ends);
499*63eb84d1Schristos 
500*63eb84d1Schristos #if defined _REGEX_RE_COMP || defined _LIBC
501*63eb84d1Schristos # ifndef _CRAY
502*63eb84d1Schristos /* 4.2 bsd compatibility.  */
503*63eb84d1Schristos extern char *re_comp (const char *);
504*63eb84d1Schristos extern int re_exec (const char *);
505*63eb84d1Schristos # endif
506*63eb84d1Schristos #endif
507*63eb84d1Schristos 
508*63eb84d1Schristos /* GCC 2.95 and later have "__restrict"; C99 compilers have
509*63eb84d1Schristos    "restrict", and "configure" may have defined "restrict".  */
510*63eb84d1Schristos #ifndef __restrict
511*63eb84d1Schristos # if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
512*63eb84d1Schristos #  if defined restrict || 199901L <= __STDC_VERSION__
513*63eb84d1Schristos #   define __restrict restrict
514*63eb84d1Schristos #  else
515*63eb84d1Schristos #   define __restrict
516*63eb84d1Schristos #  endif
517*63eb84d1Schristos # endif
518*63eb84d1Schristos #endif
519*63eb84d1Schristos /* gcc 3.1 and up support the [restrict] syntax.  */
520*63eb84d1Schristos #ifndef __restrict_arr
521*63eb84d1Schristos # if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
522*63eb84d1Schristos #  define __restrict_arr __restrict
523*63eb84d1Schristos # else
524*63eb84d1Schristos #  define __restrict_arr
525*63eb84d1Schristos # endif
526*63eb84d1Schristos #endif
527*63eb84d1Schristos 
528*63eb84d1Schristos /* POSIX compatibility.  */
529*63eb84d1Schristos extern int regcomp (regex_t *__restrict __preg,
530*63eb84d1Schristos 		    const char *__restrict __pattern,
531*63eb84d1Schristos 		    int __cflags);
532*63eb84d1Schristos 
533*63eb84d1Schristos extern int regexec (const regex_t *__restrict __preg,
534*63eb84d1Schristos 		    const char *__restrict __string, size_t __nmatch,
535*63eb84d1Schristos 		    regmatch_t __pmatch[__restrict_arr],
536*63eb84d1Schristos 		    int __eflags);
537*63eb84d1Schristos 
538*63eb84d1Schristos extern size_t regerror (int __errcode, const regex_t *__preg,
539*63eb84d1Schristos 			char *__errbuf, size_t __errbuf_size);
540*63eb84d1Schristos 
541*63eb84d1Schristos extern void regfree (regex_t *__preg);
542*63eb84d1Schristos 
543*63eb84d1Schristos 
544*63eb84d1Schristos #ifdef __cplusplus
545*63eb84d1Schristos }
546*63eb84d1Schristos #endif	/* C++ */
547*63eb84d1Schristos 
548*63eb84d1Schristos #endif /* regex.h */
549*63eb84d1Schristos 
550*63eb84d1Schristos /*
551*63eb84d1Schristos Local variables:
552*63eb84d1Schristos make-backup-files: t
553*63eb84d1Schristos version-control: t
554*63eb84d1Schristos trim-versions-without-asking: nil
555*63eb84d1Schristos End:
556*63eb84d1Schristos */
557