xref: /dragonfly/lib/libc/tre-regex/regex.h (revision c6f73aab)
1 /*
2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  */
29 
30 #ifndef _REGEX_H_
31 #define	_REGEX_H_
32 
33 #include <sys/cdefs.h>
34 #include <sys/types.h>
35 #include <wchar.h>
36 #include <xlocale.h>
37 
38 #define tre_regcomp   regcomp
39 #define tre_regcomp_l regcomp_l
40 #define tre_regexec   regexec
41 #define tre_regerror  regerror
42 #define tre_regfree   regfree
43 
44 #define tre_regncomp  regncomp
45 #define tre_regncomp_l regncomp_l
46 #define tre_regnexec  regnexec
47 #define tre_regwcomp  regwcomp
48 #define tre_regwcomp_l regwcomp_l
49 #define tre_regwexec  regwexec
50 #define tre_regwncomp regwncomp
51 #define tre_regwncomp_l regwncomp_l
52 #define tre_regwnexec regwnexec
53 
54 typedef enum {
55   REG_OK = 0,		/* No error. */
56   REG_NOMATCH,		/* No match. */
57   REG_BADPAT,		/* Invalid regexp. */
58   REG_ECOLLATE,		/* Unknown collating element. */
59   REG_ECTYPE,		/* Unknown character class name. */
60   REG_EESCAPE,		/* Trailing backslash. */
61   REG_ESUBREG,		/* Invalid back reference. */
62   REG_EBRACK,		/* "[]" imbalance */
63   REG_EPAREN,		/* "\(\)" or "()" imbalance */
64   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
65   REG_BADBR,		/* Invalid content of {} */
66   REG_ERANGE,		/* Invalid use of range operator */
67   REG_ESPACE,		/* Out of memory.  */
68   REG_BADRPT,           /* Invalid use of repetition operators. */
69   REG_EMPTY,            /* rexexp was zero-length string */
70   REG_INVARG,           /* invalid argument to regex routine */
71   REG_ILLSEQ            /* illegal byte sequence */
72 } reg_errcode_t;
73 
74 enum {
75   TRE_CONFIG_APPROX,
76   TRE_CONFIG_WCHAR,
77   TRE_CONFIG_MULTIBYTE,
78   TRE_CONFIG_SYSTEM_ABI,
79   TRE_CONFIG_VERSION
80 };
81 
82 typedef int regoff_t;
83 typedef wchar_t tre_char_t;
84 
85 typedef struct {
86   int re_magic;
87   size_t re_nsub;  /* Number of parenthesized subexpressions. */
88   const void *re_endp; /* regex string end pointer (REG_PEND) */
89   void *value;	   /* For internal use only. */
90 } regex_t;
91 
92 typedef struct {
93   regoff_t rm_so;
94   regoff_t rm_eo;
95 } regmatch_t;
96 
97 /* Approximate matching parameter struct. */
98 typedef struct {
99   int cost_ins;		/* Default cost of an inserted character. */
100   int cost_del;		/* Default cost of a deleted character. */
101   int cost_subst;	/* Default cost of a substituted character. */
102   int max_cost;		/* Maximum allowed cost of a match. */
103 
104   int max_ins;		/* Maximum allowed number of inserts. */
105   int max_del;		/* Maximum allowed number of deletes. */
106   int max_subst;	/* Maximum allowed number of substitutes. */
107   int max_err;		/* Maximum allowed number of errors total. */
108 } regaparams_t;
109 
110 /* Approximate matching result struct. */
111 typedef struct {
112   size_t nmatch;	/* Length of pmatch[] array. */
113   regmatch_t *pmatch;	/* Submatch data. */
114   int cost;		/* Cost of the match. */
115   int num_ins;		/* Number of inserts in the match. */
116   int num_del;		/* Number of deletes in the match. */
117   int num_subst;	/* Number of substitutes in the match. */
118 } regamatch_t;
119 
120 typedef struct {
121   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
122   void (*rewind)(size_t pos, void *context);
123   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
124   void *context;
125 } tre_str_source;
126 
127 /* POSIX tre_regcomp() flags. */
128 #define REG_EXTENDED	1
129 #define REG_ICASE	(REG_EXTENDED << 1)
130 #define REG_NEWLINE	(REG_ICASE << 1)
131 #define REG_NOSUB	(REG_NEWLINE << 1)
132 
133 /* Extra tre_regcomp() flags. */
134 #define REG_BASIC	0
135 #define REG_LITERAL	(REG_NOSUB << 1)
136 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
137 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
138 #define REG_PEND	(REG_UNGREEDY << 1)
139 #define REG_ENHANCED	(REG_PEND << 1)
140 
141 /* alias regcomp flags. */
142 #define REG_NOSPEC	REG_LITERAL
143 #define REG_MINIMAL	REG_UNGREEDY
144 
145 /* POSIX tre_regexec() flags. */
146 #define REG_NOTBOL	1
147 #define REG_NOTEOL	(REG_NOTBOL << 1)
148 #define REG_STARTEND	(REG_NOTEOL << 1)
149 #define	REG_BACKR	(REG_STARTEND << 1)
150 
151 /* Extra tre_regexec() flags. */
152 #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
153 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
154 
155 /* The maximum number of iterations in a bound expression. */
156 #define RE_DUP_MAX 255
157 
158 #define _REG_nexec 1
159 
160 __BEGIN_DECLS
161 
162 /* The POSIX.2 regexp functions */
163 int
164 tre_regcomp(regex_t *preg, const char *regex, int cflags);
165 
166 int
167 tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
168 	regmatch_t pmatch[], int eflags);
169 
170 size_t
171 tre_regerror(int errcode, const regex_t *preg, char *errbuf,
172 	 size_t errbuf_size);
173 
174 void
175 tre_regfree(regex_t *preg);
176 
177 /* Wide character versions (not in POSIX.2). */
178 int
179 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
180 
181 int
182 tre_regwexec(const regex_t *preg, const wchar_t *string,
183 	 size_t nmatch, regmatch_t pmatch[], int eflags);
184 
185 /* Versions with a maximum length argument and therefore the capability to
186    handle null characters in the middle of the strings (not in POSIX.2). */
187 int
188 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
189 
190 int
191 tre_regnexec(const regex_t *preg, const char *string, size_t len,
192 	 size_t nmatch, regmatch_t pmatch[], int eflags);
193 
194 int
195 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
196 
197 int
198 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
199 	  size_t nmatch, regmatch_t pmatch[], int eflags);
200 
201 /* Returns the version string.	The returned string is static. */
202 char *
203 tre_version(void);
204 
205 /* Returns the value for a config parameter.  The type to which `result'
206    must point to depends of the value of `query', see documentation for
207    more details. */
208 int
209 tre_config(int query, void *result);
210 
211 /* Returns 1 if the compiled pattern has back references, 0 if not. */
212 int
213 tre_have_backrefs(const regex_t *preg);
214 
215 /* Returns 1 if the compiled pattern uses approximate matching features,
216    0 if not. */
217 int
218 tre_have_approx(const regex_t *preg);
219 __END_DECLS
220 
221 /* The POSIX.2 regexp functions, locale version */
222 int
223 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t locale);
224 
225 int
226 tre_regncomp_l(regex_t *preg, const char *regex, size_t len, int cflags,
227     locale_t locale);
228 
229 int
230 tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags,
231     locale_t locale);
232 
233 int
234 tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t len, int cflags,
235     locale_t locale);
236 
237 #endif /* !_REGEX_H_ */
238