xref: /dragonfly/lib/libc/tre-regex/regex.h (revision 56380a7f)
1 /*
2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  */
29 
30 #ifndef _REGEX_H_
31 #define	_REGEX_H_
32 
33 #include <sys/cdefs.h>
34 #include <sys/types.h>
35 #include <wchar.h>
36 #include <xlocale.h>
37 
38 #define tre_regcomp   regcomp
39 #define tre_regcomp_l regcomp_l
40 #define tre_regexec   regexec
41 #define tre_regerror  regerror
42 #define tre_regfree   regfree
43 
44 #define tre_regncomp  regncomp
45 #define tre_regncomp_l regncomp_l
46 #define tre_regnexec  regnexec
47 #define tre_regwcomp  regwcomp
48 #define tre_regwcomp_l regwcomp_l
49 #define tre_regwexec  regwexec
50 #define tre_regwncomp regwncomp
51 #define tre_regwncomp_l regwncomp_l
52 #define tre_regwnexec regwnexec
53 
54 typedef enum {
55 #if __BSD_VISIBLE || (__POSIX_VISIBLE && __POSIX_VISIBLE < 200809)
56   REG_ENOSYS = -1,	/* Reserved */
57 #endif
58   REG_OK = 0,		/* No error. */
59   REG_NOMATCH,		/* No match. */
60   REG_BADPAT,		/* Invalid regexp. */
61   REG_ECOLLATE,		/* Unknown collating element. */
62   REG_ECTYPE,		/* Unknown character class name. */
63   REG_EESCAPE,		/* Trailing backslash. */
64   REG_ESUBREG,		/* Invalid back reference. */
65   REG_EBRACK,		/* "[]" imbalance */
66   REG_EPAREN,		/* "\(\)" or "()" imbalance */
67   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
68   REG_BADBR,		/* Invalid content of {} */
69   REG_ERANGE,		/* Invalid use of range operator */
70   REG_ESPACE,		/* Out of memory.  */
71   REG_BADRPT,           /* Invalid use of repetition operators. */
72   REG_EMPTY,            /* rexexp was zero-length string */
73   REG_INVARG,           /* invalid argument to regex routine */
74   REG_ILLSEQ            /* illegal byte sequence */
75 } reg_errcode_t;
76 
77 enum {
78   TRE_CONFIG_APPROX,
79   TRE_CONFIG_WCHAR,
80   TRE_CONFIG_MULTIBYTE,
81   TRE_CONFIG_SYSTEM_ABI,
82   TRE_CONFIG_VERSION
83 };
84 
85 typedef int regoff_t;
86 typedef wchar_t tre_char_t;
87 
88 typedef struct {
89   int re_magic;
90   size_t re_nsub;  /* Number of parenthesized subexpressions. */
91   const void *re_endp; /* regex string end pointer (REG_PEND) */
92   void *value;	   /* For internal use only. */
93 } regex_t;
94 
95 typedef struct {
96   regoff_t rm_so;
97   regoff_t rm_eo;
98 } regmatch_t;
99 
100 /* Approximate matching parameter struct. */
101 typedef struct {
102   int cost_ins;		/* Default cost of an inserted character. */
103   int cost_del;		/* Default cost of a deleted character. */
104   int cost_subst;	/* Default cost of a substituted character. */
105   int max_cost;		/* Maximum allowed cost of a match. */
106 
107   int max_ins;		/* Maximum allowed number of inserts. */
108   int max_del;		/* Maximum allowed number of deletes. */
109   int max_subst;	/* Maximum allowed number of substitutes. */
110   int max_err;		/* Maximum allowed number of errors total. */
111 } regaparams_t;
112 
113 /* Approximate matching result struct. */
114 typedef struct {
115   size_t nmatch;	/* Length of pmatch[] array. */
116   regmatch_t *pmatch;	/* Submatch data. */
117   int cost;		/* Cost of the match. */
118   int num_ins;		/* Number of inserts in the match. */
119   int num_del;		/* Number of deletes in the match. */
120   int num_subst;	/* Number of substitutes in the match. */
121 } regamatch_t;
122 
123 typedef struct {
124   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
125   void (*rewind)(size_t pos, void *context);
126   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
127   void *context;
128 } tre_str_source;
129 
130 /* POSIX tre_regcomp() flags. */
131 #define REG_EXTENDED	1
132 #define REG_ICASE	(REG_EXTENDED << 1)
133 #define REG_NEWLINE	(REG_ICASE << 1)
134 #define REG_NOSUB	(REG_NEWLINE << 1)
135 
136 /* Extra tre_regcomp() flags. */
137 #define REG_BASIC	0
138 #define REG_LITERAL	(REG_NOSUB << 1)
139 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
140 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
141 #define REG_PEND	(REG_UNGREEDY << 1)
142 #define REG_ENHANCED	(REG_PEND << 1)
143 
144 /* alias regcomp flags. */
145 #define REG_NOSPEC	REG_LITERAL
146 #define REG_MINIMAL	REG_UNGREEDY
147 
148 /* POSIX tre_regexec() flags. */
149 #define REG_NOTBOL	1
150 #define REG_NOTEOL	(REG_NOTBOL << 1)
151 #define REG_STARTEND	(REG_NOTEOL << 1)
152 #define	REG_BACKR	(REG_STARTEND << 1)
153 
154 /* Extra tre_regexec() flags. */
155 #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
156 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
157 
158 /* The maximum number of iterations in a bound expression. */
159 #define RE_DUP_MAX 255
160 
161 #define _REG_nexec 1
162 
163 __BEGIN_DECLS
164 
165 /* The POSIX.2 regexp functions */
166 int
167 tre_regcomp(regex_t * __restrict preg, const char * __restrict regex,
168     int cflags);
169 
170 int
171 tre_regexec(const regex_t * __restrict preg, const char * __restrict string,
172     size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags);
173 
174 size_t
175 tre_regerror(int errcode, const regex_t * __restrict preg,
176     char * __restrict errbuf, size_t errbuf_size);
177 
178 void
179 tre_regfree(regex_t *preg);
180 
181 /* Wide character versions (not in POSIX.2). */
182 int
183 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
184 
185 int
186 tre_regwexec(const regex_t *preg, const wchar_t *string,
187 	 size_t nmatch, regmatch_t pmatch[], int eflags);
188 
189 /* Versions with a maximum length argument and therefore the capability to
190    handle null characters in the middle of the strings (not in POSIX.2). */
191 int
192 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
193 
194 int
195 tre_regnexec(const regex_t *preg, const char *string, size_t len,
196 	 size_t nmatch, regmatch_t pmatch[], int eflags);
197 
198 int
199 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
200 
201 int
202 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
203 	  size_t nmatch, regmatch_t pmatch[], int eflags);
204 
205 /* Returns the version string.	The returned string is static. */
206 char *
207 tre_version(void);
208 
209 /* Returns the value for a config parameter.  The type to which `result'
210    must point to depends of the value of `query', see documentation for
211    more details. */
212 int
213 tre_config(int query, void *result);
214 
215 /* Returns 1 if the compiled pattern has back references, 0 if not. */
216 int
217 tre_have_backrefs(const regex_t *preg);
218 
219 /* Returns 1 if the compiled pattern uses approximate matching features,
220    0 if not. */
221 int
222 tre_have_approx(const regex_t *preg);
223 __END_DECLS
224 
225 /* The POSIX.2 regexp functions, locale version */
226 int
227 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t locale);
228 
229 int
230 tre_regncomp_l(regex_t *preg, const char *regex, size_t len, int cflags,
231     locale_t locale);
232 
233 int
234 tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags,
235     locale_t locale);
236 
237 int
238 tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t len, int cflags,
239     locale_t locale);
240 
241 #endif /* !_REGEX_H_ */
242