xref: /dragonfly/lib/libc/tre-regex/regex.h (revision 0ca59c34)
1 /*
2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  */
29 
30 #ifndef _REGEX_H_
31 #define	_REGEX_H_
32 
33 #include <sys/cdefs.h>
34 #include <sys/types.h>
35 #include <wchar.h>
36 #include <xlocale.h>
37 
38 #define tre_regcomp   regcomp
39 #define tre_regcomp_l regcomp_l
40 #define tre_regexec   regexec
41 #define tre_regerror  regerror
42 #define tre_regfree   regfree
43 
44 #define tre_regncomp  regncomp
45 #define tre_regncomp_l regncomp_l
46 #define tre_regnexec  regnexec
47 #define tre_regwcomp  regwcomp
48 #define tre_regwcomp_l regwcomp_l
49 #define tre_regwexec  regwexec
50 #define tre_regwncomp regwncomp
51 #define tre_regwncomp_l regwncomp_l
52 #define tre_regwnexec regwnexec
53 
54 typedef enum {
55 #if __BSD_VISIBLE || __POSIX_VISIBLE <= 200112
56   REG_ENOSYS = -1,	/* Reserved */
57 #endif
58   REG_OK = 0,		/* No error. */
59   REG_NOMATCH,		/* No match. */
60   REG_BADPAT,		/* Invalid regexp. */
61   REG_ECOLLATE,		/* Unknown collating element. */
62   REG_ECTYPE,		/* Unknown character class name. */
63   REG_EESCAPE,		/* Trailing backslash. */
64   REG_ESUBREG,		/* Invalid back reference. */
65   REG_EBRACK,		/* "[]" imbalance */
66   REG_EPAREN,		/* "\(\)" or "()" imbalance */
67   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
68   REG_BADBR,		/* Invalid content of {} */
69   REG_ERANGE,		/* Invalid use of range operator */
70   REG_ESPACE,		/* Out of memory.  */
71   REG_BADRPT,           /* Invalid use of repetition operators. */
72   REG_EMPTY,            /* rexexp was zero-length string */
73   REG_INVARG,           /* invalid argument to regex routine */
74   REG_ILLSEQ            /* illegal byte sequence */
75 } reg_errcode_t;
76 
77 enum {
78   TRE_CONFIG_APPROX,
79   TRE_CONFIG_WCHAR,
80   TRE_CONFIG_MULTIBYTE,
81   TRE_CONFIG_SYSTEM_ABI,
82   TRE_CONFIG_VERSION
83 };
84 
85 typedef int regoff_t;
86 typedef wchar_t tre_char_t;
87 
88 typedef struct {
89   int re_magic;
90   size_t re_nsub;  /* Number of parenthesized subexpressions. */
91   const void *re_endp; /* regex string end pointer (REG_PEND) */
92   void *value;	   /* For internal use only. */
93 } regex_t;
94 
95 typedef struct {
96   regoff_t rm_so;
97   regoff_t rm_eo;
98 } regmatch_t;
99 
100 /* Approximate matching parameter struct. */
101 typedef struct {
102   int cost_ins;		/* Default cost of an inserted character. */
103   int cost_del;		/* Default cost of a deleted character. */
104   int cost_subst;	/* Default cost of a substituted character. */
105   int max_cost;		/* Maximum allowed cost of a match. */
106 
107   int max_ins;		/* Maximum allowed number of inserts. */
108   int max_del;		/* Maximum allowed number of deletes. */
109   int max_subst;	/* Maximum allowed number of substitutes. */
110   int max_err;		/* Maximum allowed number of errors total. */
111 } regaparams_t;
112 
113 /* Approximate matching result struct. */
114 typedef struct {
115   size_t nmatch;	/* Length of pmatch[] array. */
116   regmatch_t *pmatch;	/* Submatch data. */
117   int cost;		/* Cost of the match. */
118   int num_ins;		/* Number of inserts in the match. */
119   int num_del;		/* Number of deletes in the match. */
120   int num_subst;	/* Number of substitutes in the match. */
121 } regamatch_t;
122 
123 typedef struct {
124   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
125   void (*rewind)(size_t pos, void *context);
126   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
127   void *context;
128 } tre_str_source;
129 
130 /* POSIX tre_regcomp() flags. */
131 #define REG_EXTENDED	1
132 #define REG_ICASE	(REG_EXTENDED << 1)
133 #define REG_NEWLINE	(REG_ICASE << 1)
134 #define REG_NOSUB	(REG_NEWLINE << 1)
135 
136 /* Extra tre_regcomp() flags. */
137 #define REG_BASIC	0
138 #define REG_LITERAL	(REG_NOSUB << 1)
139 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
140 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
141 #define REG_PEND	(REG_UNGREEDY << 1)
142 #define REG_ENHANCED	(REG_PEND << 1)
143 
144 /* alias regcomp flags. */
145 #define REG_NOSPEC	REG_LITERAL
146 #define REG_MINIMAL	REG_UNGREEDY
147 
148 /* POSIX tre_regexec() flags. */
149 #define REG_NOTBOL	1
150 #define REG_NOTEOL	(REG_NOTBOL << 1)
151 #define REG_STARTEND	(REG_NOTEOL << 1)
152 #define	REG_BACKR	(REG_STARTEND << 1)
153 
154 /* Extra tre_regexec() flags. */
155 #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
156 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
157 
158 /* The maximum number of iterations in a bound expression. */
159 #define RE_DUP_MAX 255
160 
161 #define _REG_nexec 1
162 
163 __BEGIN_DECLS
164 
165 /* The POSIX.2 regexp functions */
166 int
167 tre_regcomp(regex_t *preg, const char *regex, int cflags);
168 
169 int
170 tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
171 	regmatch_t pmatch[], int eflags);
172 
173 size_t
174 tre_regerror(int errcode, const regex_t *preg, char *errbuf,
175 	 size_t errbuf_size);
176 
177 void
178 tre_regfree(regex_t *preg);
179 
180 /* Wide character versions (not in POSIX.2). */
181 int
182 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
183 
184 int
185 tre_regwexec(const regex_t *preg, const wchar_t *string,
186 	 size_t nmatch, regmatch_t pmatch[], int eflags);
187 
188 /* Versions with a maximum length argument and therefore the capability to
189    handle null characters in the middle of the strings (not in POSIX.2). */
190 int
191 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
192 
193 int
194 tre_regnexec(const regex_t *preg, const char *string, size_t len,
195 	 size_t nmatch, regmatch_t pmatch[], int eflags);
196 
197 int
198 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
199 
200 int
201 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
202 	  size_t nmatch, regmatch_t pmatch[], int eflags);
203 
204 /* Returns the version string.	The returned string is static. */
205 char *
206 tre_version(void);
207 
208 /* Returns the value for a config parameter.  The type to which `result'
209    must point to depends of the value of `query', see documentation for
210    more details. */
211 int
212 tre_config(int query, void *result);
213 
214 /* Returns 1 if the compiled pattern has back references, 0 if not. */
215 int
216 tre_have_backrefs(const regex_t *preg);
217 
218 /* Returns 1 if the compiled pattern uses approximate matching features,
219    0 if not. */
220 int
221 tre_have_approx(const regex_t *preg);
222 __END_DECLS
223 
224 /* The POSIX.2 regexp functions, locale version */
225 int
226 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t locale);
227 
228 int
229 tre_regncomp_l(regex_t *preg, const char *regex, size_t len, int cflags,
230     locale_t locale);
231 
232 int
233 tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags,
234     locale_t locale);
235 
236 int
237 tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t len, int cflags,
238     locale_t locale);
239 
240 #endif /* !_REGEX_H_ */
241