1 /* 2 * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #ifndef _REGEX_H_ 31 #define _REGEX_H_ 32 33 #include <sys/cdefs.h> 34 #include <sys/types.h> 35 #include <wchar.h> 36 #include <xlocale.h> 37 38 #define tre_regcomp regcomp 39 #define tre_regcomp_l regcomp_l 40 #define tre_regexec regexec 41 #define tre_regerror regerror 42 #define tre_regfree regfree 43 44 #define tre_regncomp regncomp 45 #define tre_regncomp_l regncomp_l 46 #define tre_regnexec regnexec 47 #define tre_regwcomp regwcomp 48 #define tre_regwcomp_l regwcomp_l 49 #define tre_regwexec regwexec 50 #define tre_regwncomp regwncomp 51 #define tre_regwncomp_l regwncomp_l 52 #define tre_regwnexec regwnexec 53 54 typedef enum { 55 #if __BSD_VISIBLE || __POSIX_VISIBLE <= 200112 56 REG_ENOSYS = -1, /* Reserved */ 57 #endif 58 REG_OK = 0, /* No error. */ 59 REG_NOMATCH, /* No match. */ 60 REG_BADPAT, /* Invalid regexp. */ 61 REG_ECOLLATE, /* Unknown collating element. */ 62 REG_ECTYPE, /* Unknown character class name. */ 63 REG_EESCAPE, /* Trailing backslash. */ 64 REG_ESUBREG, /* Invalid back reference. */ 65 REG_EBRACK, /* "[]" imbalance */ 66 REG_EPAREN, /* "\(\)" or "()" imbalance */ 67 REG_EBRACE, /* "\{\}" or "{}" imbalance */ 68 REG_BADBR, /* Invalid content of {} */ 69 REG_ERANGE, /* Invalid use of range operator */ 70 REG_ESPACE, /* Out of memory. */ 71 REG_BADRPT, /* Invalid use of repetition operators. */ 72 REG_EMPTY, /* rexexp was zero-length string */ 73 REG_INVARG, /* invalid argument to regex routine */ 74 REG_ILLSEQ /* illegal byte sequence */ 75 } reg_errcode_t; 76 77 enum { 78 TRE_CONFIG_APPROX, 79 TRE_CONFIG_WCHAR, 80 TRE_CONFIG_MULTIBYTE, 81 TRE_CONFIG_SYSTEM_ABI, 82 TRE_CONFIG_VERSION 83 }; 84 85 typedef int regoff_t; 86 typedef wchar_t tre_char_t; 87 88 typedef struct { 89 int re_magic; 90 size_t re_nsub; /* Number of parenthesized subexpressions. */ 91 const void *re_endp; /* regex string end pointer (REG_PEND) */ 92 void *value; /* For internal use only. */ 93 } regex_t; 94 95 typedef struct { 96 regoff_t rm_so; 97 regoff_t rm_eo; 98 } regmatch_t; 99 100 /* Approximate matching parameter struct. */ 101 typedef struct { 102 int cost_ins; /* Default cost of an inserted character. */ 103 int cost_del; /* Default cost of a deleted character. */ 104 int cost_subst; /* Default cost of a substituted character. */ 105 int max_cost; /* Maximum allowed cost of a match. */ 106 107 int max_ins; /* Maximum allowed number of inserts. */ 108 int max_del; /* Maximum allowed number of deletes. */ 109 int max_subst; /* Maximum allowed number of substitutes. */ 110 int max_err; /* Maximum allowed number of errors total. */ 111 } regaparams_t; 112 113 /* Approximate matching result struct. */ 114 typedef struct { 115 size_t nmatch; /* Length of pmatch[] array. */ 116 regmatch_t *pmatch; /* Submatch data. */ 117 int cost; /* Cost of the match. */ 118 int num_ins; /* Number of inserts in the match. */ 119 int num_del; /* Number of deletes in the match. */ 120 int num_subst; /* Number of substitutes in the match. */ 121 } regamatch_t; 122 123 typedef struct { 124 int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context); 125 void (*rewind)(size_t pos, void *context); 126 int (*compare)(size_t pos1, size_t pos2, size_t len, void *context); 127 void *context; 128 } tre_str_source; 129 130 /* POSIX tre_regcomp() flags. */ 131 #define REG_EXTENDED 1 132 #define REG_ICASE (REG_EXTENDED << 1) 133 #define REG_NEWLINE (REG_ICASE << 1) 134 #define REG_NOSUB (REG_NEWLINE << 1) 135 136 /* Extra tre_regcomp() flags. */ 137 #define REG_BASIC 0 138 #define REG_LITERAL (REG_NOSUB << 1) 139 #define REG_RIGHT_ASSOC (REG_LITERAL << 1) 140 #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1) 141 #define REG_PEND (REG_UNGREEDY << 1) 142 #define REG_ENHANCED (REG_PEND << 1) 143 144 /* alias regcomp flags. */ 145 #define REG_NOSPEC REG_LITERAL 146 #define REG_MINIMAL REG_UNGREEDY 147 148 /* POSIX tre_regexec() flags. */ 149 #define REG_NOTBOL 1 150 #define REG_NOTEOL (REG_NOTBOL << 1) 151 #define REG_STARTEND (REG_NOTEOL << 1) 152 #define REG_BACKR (REG_STARTEND << 1) 153 154 /* Extra tre_regexec() flags. */ 155 #define REG_APPROX_MATCHER (REG_NOTEOL << 1) 156 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) 157 158 /* The maximum number of iterations in a bound expression. */ 159 #define RE_DUP_MAX 255 160 161 #define _REG_nexec 1 162 163 __BEGIN_DECLS 164 165 /* The POSIX.2 regexp functions */ 166 int 167 tre_regcomp(regex_t *preg, const char *regex, int cflags); 168 169 int 170 tre_regexec(const regex_t *preg, const char *string, size_t nmatch, 171 regmatch_t pmatch[], int eflags); 172 173 size_t 174 tre_regerror(int errcode, const regex_t *preg, char *errbuf, 175 size_t errbuf_size); 176 177 void 178 tre_regfree(regex_t *preg); 179 180 /* Wide character versions (not in POSIX.2). */ 181 int 182 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags); 183 184 int 185 tre_regwexec(const regex_t *preg, const wchar_t *string, 186 size_t nmatch, regmatch_t pmatch[], int eflags); 187 188 /* Versions with a maximum length argument and therefore the capability to 189 handle null characters in the middle of the strings (not in POSIX.2). */ 190 int 191 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags); 192 193 int 194 tre_regnexec(const regex_t *preg, const char *string, size_t len, 195 size_t nmatch, regmatch_t pmatch[], int eflags); 196 197 int 198 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags); 199 200 int 201 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len, 202 size_t nmatch, regmatch_t pmatch[], int eflags); 203 204 /* Returns the version string. The returned string is static. */ 205 char * 206 tre_version(void); 207 208 /* Returns the value for a config parameter. The type to which `result' 209 must point to depends of the value of `query', see documentation for 210 more details. */ 211 int 212 tre_config(int query, void *result); 213 214 /* Returns 1 if the compiled pattern has back references, 0 if not. */ 215 int 216 tre_have_backrefs(const regex_t *preg); 217 218 /* Returns 1 if the compiled pattern uses approximate matching features, 219 0 if not. */ 220 int 221 tre_have_approx(const regex_t *preg); 222 __END_DECLS 223 224 /* The POSIX.2 regexp functions, locale version */ 225 int 226 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t locale); 227 228 int 229 tre_regncomp_l(regex_t *preg, const char *regex, size_t len, int cflags, 230 locale_t locale); 231 232 int 233 tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags, 234 locale_t locale); 235 236 int 237 tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t len, int cflags, 238 locale_t locale); 239 240 #endif /* !_REGEX_H_ */ 241