1 /* 2 tre-internal.h - TRE internal definitions 3 4 Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>. 5 6 This library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 This library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with this library; if not, write to the Free Software 18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 20 */ 21 22 #ifndef TRE_INTERNAL_H 23 #define TRE_INTERNAL_H 1 24 25 #ifdef HAVE_WCHAR_H 26 #include <wchar.h> 27 #endif /* HAVE_WCHAR_H */ 28 29 #ifdef HAVE_WCTYPE_H 30 #include <wctype.h> 31 #endif /* !HAVE_WCTYPE_H */ 32 33 #include <ctype.h> 34 #include "regex.h" 35 36 #ifdef TRE_DEBUG 37 #include <stdio.h> 38 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(0) 39 #else /* !TRE_DEBUG */ 40 #define DPRINT(msg) do { } while(0) 41 #endif /* !TRE_DEBUG */ 42 43 #define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) 44 45 #ifdef HAVE_MBRTOWC 46 #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps))) 47 #else /* !HAVE_MBRTOWC */ 48 #ifdef HAVE_MBTOWC 49 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) 50 #endif /* HAVE_MBTOWC */ 51 #endif /* !HAVE_MBRTOWC */ 52 53 #ifdef TRE_MULTIBYTE 54 #ifdef HAVE_MBSTATE_T 55 #define TRE_MBSTATE 56 #endif /* TRE_MULTIBYTE */ 57 #endif /* HAVE_MBSTATE_T */ 58 59 /* Define the character types and functions. */ 60 #ifdef TRE_WCHAR 61 62 /* Wide characters. */ 63 typedef wint_t tre_cint_t; 64 #define TRE_CHAR_MAX WCHAR_MAX 65 66 #ifdef TRE_MULTIBYTE 67 #define TRE_MB_CUR_MAX MB_CUR_MAX 68 #else /* !TRE_MULTIBYTE */ 69 #define TRE_MB_CUR_MAX 1 70 #endif /* !TRE_MULTIBYTE */ 71 72 #define tre_isalnum iswalnum 73 #define tre_isalpha iswalpha 74 #ifdef HAVE_ISWBLANK 75 #define tre_isblank iswblank 76 #endif /* HAVE_ISWBLANK */ 77 #define tre_iscntrl iswcntrl 78 #define tre_isdigit iswdigit 79 #define tre_isgraph iswgraph 80 #define tre_islower iswlower 81 #define tre_isprint iswprint 82 #define tre_ispunct iswpunct 83 #define tre_isspace iswspace 84 #define tre_isupper iswupper 85 #define tre_isxdigit iswxdigit 86 87 #define tre_tolower towlower 88 #define tre_toupper towupper 89 #define tre_strlen wcslen 90 91 #else /* !TRE_WCHAR */ 92 93 /* 8 bit characters. */ 94 typedef short tre_cint_t; 95 #define TRE_CHAR_MAX 255 96 #define TRE_MB_CUR_MAX 1 97 98 #define tre_isalnum isalnum 99 #define tre_isalpha isalpha 100 #ifdef HAVE_ISASCII 101 #define tre_isascii isascii 102 #endif /* HAVE_ISASCII */ 103 #ifdef HAVE_ISBLANK 104 #define tre_isblank isblank 105 #endif /* HAVE_ISBLANK */ 106 #define tre_iscntrl iscntrl 107 #define tre_isdigit isdigit 108 #define tre_isgraph isgraph 109 #define tre_islower islower 110 #define tre_isprint isprint 111 #define tre_ispunct ispunct 112 #define tre_isspace isspace 113 #define tre_isupper isupper 114 #define tre_isxdigit isxdigit 115 116 #define tre_tolower(c) (tre_cint_t)(tolower(c)) 117 #define tre_toupper(c) (tre_cint_t)(toupper(c)) 118 #define tre_strlen(s) (strlen((const char*)s)) 119 120 #endif /* !TRE_WCHAR */ 121 122 #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE) 123 #define TRE_USE_SYSTEM_WCTYPE 1 124 #endif 125 126 #ifdef TRE_USE_SYSTEM_WCTYPE 127 /* Use system provided iswctype() and wctype(). */ 128 typedef wctype_t tre_ctype_t; 129 #define tre_isctype iswctype 130 #define tre_ctype wctype 131 #else /* !TRE_USE_SYSTEM_WCTYPE */ 132 /* Define our own versions of iswctype() and wctype(). */ 133 typedef int (*tre_ctype_t)(tre_cint_t); 134 #define tre_isctype(c, type) ( (type)(c) ) 135 tre_ctype_t tre_ctype(const char *name); 136 #endif /* !TRE_USE_SYSTEM_WCTYPE */ 137 138 typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t; 139 140 /* Returns number of bytes to add to (char *)ptr to make it 141 properly aligned for the type. */ 142 #define ALIGN(ptr, type) \ 143 ((((long)ptr) % sizeof(type)) \ 144 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \ 145 : 0) 146 147 #undef MAX 148 #undef MIN 149 #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) 150 #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) 151 152 /* Define STRF to the correct printf formatter for strings. */ 153 #ifdef TRE_WCHAR 154 #define STRF "ls" 155 #else /* !TRE_WCHAR */ 156 #define STRF "s" 157 #endif /* !TRE_WCHAR */ 158 159 /* TNFA transition type. A TNFA state is an array of transitions, 160 the terminator is a transition with NULL `state'. */ 161 typedef struct tnfa_transition tre_tnfa_transition_t; 162 163 struct tnfa_transition { 164 /* Range of accepted characters. */ 165 tre_cint_t code_min; 166 tre_cint_t code_max; 167 /* Pointer to the destination state. */ 168 tre_tnfa_transition_t *state; 169 /* ID number of the destination state. */ 170 int state_id; 171 /* -1 terminated array of tags (or NULL). */ 172 int *tags; 173 /* Matching parameters settings (or NULL). */ 174 int *params; 175 /* Assertion bitmap. */ 176 int assertions; 177 /* Assertion parameters. */ 178 union { 179 /* Character class assertion. */ 180 tre_ctype_t class; 181 /* Back reference assertion. */ 182 int backref; 183 } u; 184 /* Negative character class assertions. */ 185 tre_ctype_t *neg_classes; 186 }; 187 188 189 /* Assertions. */ 190 #define ASSERT_AT_BOL 1 /* Beginning of line. */ 191 #define ASSERT_AT_EOL 2 /* End of line. */ 192 #define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */ 193 #define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */ 194 #define ASSERT_AT_BOW 16 /* Beginning of word. */ 195 #define ASSERT_AT_EOW 32 /* End of word. */ 196 #define ASSERT_AT_WB 64 /* Word boundary. */ 197 #define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */ 198 #define ASSERT_BACKREF 256 /* A back reference in `backref'. */ 199 #define ASSERT_LAST 256 200 201 /* Tag directions. */ 202 typedef enum { 203 TRE_TAG_MINIMIZE = 0, 204 TRE_TAG_MAXIMIZE = 1 205 } tre_tag_direction_t; 206 207 /* Parameters that can be changed dynamically while matching. */ 208 typedef enum { 209 TRE_PARAM_COST_INS = 0, 210 TRE_PARAM_COST_DEL = 1, 211 TRE_PARAM_COST_SUBST = 2, 212 TRE_PARAM_COST_MAX = 3, 213 TRE_PARAM_MAX_INS = 4, 214 TRE_PARAM_MAX_DEL = 5, 215 TRE_PARAM_MAX_SUBST = 6, 216 TRE_PARAM_MAX_ERR = 7, 217 TRE_PARAM_DEPTH = 8, 218 TRE_PARAM_LAST = 9 219 } tre_param_t; 220 221 /* Unset matching parameter */ 222 #define TRE_PARAM_UNSET -1 223 224 /* Signifies the default matching parameter value. */ 225 #define TRE_PARAM_DEFAULT -2 226 227 /* Instructions to compute submatch register values from tag values 228 after a successful match. */ 229 struct tre_submatch_data { 230 /* Tag that gives the value for rm_so (submatch start offset). */ 231 int so_tag; 232 /* Tag that gives the value for rm_eo (submatch end offset). */ 233 int eo_tag; 234 /* List of submatches this submatch is contained in. */ 235 int *parents; 236 }; 237 238 typedef struct tre_submatch_data tre_submatch_data_t; 239 240 241 /* TNFA definition. */ 242 typedef struct tnfa tre_tnfa_t; 243 244 struct tnfa { 245 tre_tnfa_transition_t *transitions; 246 unsigned int num_transitions; 247 tre_tnfa_transition_t *initial; 248 tre_tnfa_transition_t *final; 249 tre_submatch_data_t *submatch_data; 250 char *firstpos_chars; 251 int first_char; 252 unsigned int num_submatches; 253 tre_tag_direction_t *tag_directions; 254 int *minimal_tags; 255 int num_tags; 256 int num_minimals; 257 int end_tag; 258 int num_states; 259 int cflags; 260 int have_backrefs; 261 int have_approx; 262 int params_depth; 263 }; 264 265 int 266 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags); 267 268 void 269 tre_free(regex_t *preg); 270 271 void 272 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 273 const tre_tnfa_t *tnfa, int *tags, int match_eo); 274 275 reg_errcode_t 276 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, 277 tre_str_type_t type, int *match_tags, int eflags, 278 int *match_end_ofs); 279 280 reg_errcode_t 281 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, 282 tre_str_type_t type, int *match_tags, int eflags, 283 int *match_end_ofs); 284 285 reg_errcode_t 286 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, 287 int len, tre_str_type_t type, int *match_tags, 288 int eflags, int *match_end_ofs); 289 290 #ifdef TRE_APPROX 291 reg_errcode_t 292 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len, 293 tre_str_type_t type, int *match_tags, 294 regamatch_t *match, regaparams_t params, 295 int eflags, int *match_end_ofs); 296 #endif /* TRE_APPROX */ 297 298 #endif /* TRE_INTERNAL_H */ 299 300 /* EOF */ 301