1 /* 2 tre-internal.h - TRE internal definitions 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifndef TRE_INTERNAL_H 10 #define TRE_INTERNAL_H 1 11 12 #ifdef HAVE_WCHAR_H 13 #include <wchar.h> 14 #endif /* HAVE_WCHAR_H */ 15 16 #ifdef HAVE_WCTYPE_H 17 #include <wctype.h> 18 #endif /* !HAVE_WCTYPE_H */ 19 20 #include <ctype.h> 21 #include <bitstring.h> 22 #include "tre.h" 23 #include "xlocale_private.h" 24 25 #ifdef TRE_DEBUG 26 #include <stdio.h> 27 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0) 28 #else /* !TRE_DEBUG */ 29 #define DPRINT(msg) do { } while(/*CONSTCOND*/0) 30 #endif /* !TRE_DEBUG */ 31 32 #define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) 33 34 #ifdef HAVE_MBRTOWC 35 #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps))) 36 #define tre_mbrtowc_l(pwc,s,n,ps,l) (mbrtowc_l((pwc), (s), (n), (ps), (l))) 37 #else /* !HAVE_MBRTOWC */ 38 #ifdef HAVE_MBTOWC 39 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) 40 #endif /* HAVE_MBTOWC */ 41 #endif /* !HAVE_MBRTOWC */ 42 43 #ifdef TRE_MULTIBYTE 44 #ifdef HAVE_MBSTATE_T 45 #define TRE_MBSTATE 46 #endif /* TRE_MULTIBYTE */ 47 #endif /* HAVE_MBSTATE_T */ 48 49 /* Define the character types and functions. */ 50 #ifdef TRE_WCHAR 51 52 /* Wide characters. */ 53 typedef wint_t tre_cint_t; 54 #define TRE_CHAR_MAX WCHAR_MAX 55 56 #ifdef TRE_MULTIBYTE 57 #define TRE_MB_CUR_MAX MB_CUR_MAX 58 #define TRE_MB_CUR_MAX_L MB_CUR_MAX_L 59 #else /* !TRE_MULTIBYTE */ 60 #define TRE_MB_CUR_MAX 1 61 #endif /* !TRE_MULTIBYTE */ 62 63 #define tre_isalnum iswalnum 64 #define tre_isalpha iswalpha 65 #ifdef HAVE_ISWBLANK 66 #define tre_isblank iswblank 67 #endif /* HAVE_ISWBLANK */ 68 #define tre_iscntrl iswcntrl 69 #define tre_isdigit iswdigit 70 #define tre_isgraph iswgraph 71 #define tre_islower iswlower 72 #define tre_isprint iswprint 73 #define tre_ispunct iswpunct 74 #define tre_isspace iswspace 75 #define tre_isupper iswupper 76 #define tre_isxdigit iswxdigit 77 78 #define tre_tolower towlower 79 #define tre_toupper towupper 80 #define tre_strlen wcslen 81 82 #define tre_isalnum_l iswalnum_l 83 #define tre_isdigit_l iswdigit_l 84 #define tre_islower_l iswlower_l 85 #define tre_isupper_l iswupper_l 86 #define tre_isxdigit_l iswxdigit_l 87 #define tre_tolower_l towlower_l 88 #define tre_toupper_l towupper_l 89 90 #else /* !TRE_WCHAR */ 91 92 /* 8 bit characters. */ 93 typedef short tre_cint_t; 94 #define TRE_CHAR_MAX 255 95 #define TRE_MB_CUR_MAX 1 96 97 #define tre_isalnum isalnum 98 #define tre_isalpha isalpha 99 #ifdef HAVE_ISASCII 100 #define tre_isascii isascii 101 #endif /* HAVE_ISASCII */ 102 #ifdef HAVE_ISBLANK 103 #define tre_isblank isblank 104 #endif /* HAVE_ISBLANK */ 105 #define tre_iscntrl iscntrl 106 #define tre_isdigit isdigit 107 #define tre_isgraph isgraph 108 #define tre_islower islower 109 #define tre_isprint isprint 110 #define tre_ispunct ispunct 111 #define tre_isspace isspace 112 #define tre_isupper isupper 113 #define tre_isxdigit isxdigit 114 115 #define tre_tolower(c) (tre_cint_t)(tolower(c)) 116 #define tre_toupper(c) (tre_cint_t)(toupper(c)) 117 #define tre_strlen(s) (strlen((const char*)s)) 118 119 #endif /* !TRE_WCHAR */ 120 121 #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE) 122 #define TRE_USE_SYSTEM_WCTYPE 1 123 #endif 124 125 #ifdef TRE_USE_SYSTEM_WCTYPE 126 /* Use system provided iswctype() and wctype(). */ 127 typedef wctype_t tre_ctype_t; 128 #define tre_isctype iswctype 129 #define tre_ctype wctype 130 #define tre_isctype_l iswctype_l 131 #define tre_ctype_l wctype_l 132 #else /* !TRE_USE_SYSTEM_WCTYPE */ 133 /* Define our own versions of iswctype() and wctype(). */ 134 typedef int (*tre_ctype_t)(tre_cint_t); 135 #define tre_isctype(c, type) ( (type)(c) ) 136 tre_ctype_t tre_ctype(const char *name); 137 #endif /* !TRE_USE_SYSTEM_WCTYPE */ 138 139 typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t; 140 141 /* Returns number of bytes to add to (char *)ptr to make it 142 properly aligned for the type. */ 143 #define ALIGN(ptr, type) \ 144 ((((long)ptr) % sizeof(type)) \ 145 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \ 146 : 0) 147 148 #undef MAX 149 #undef MIN 150 #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) 151 #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) 152 153 /* Define STRF to the correct printf formatter for strings. */ 154 #ifdef TRE_WCHAR 155 #define STRF "ls" 156 #else /* !TRE_WCHAR */ 157 #define STRF "s" 158 #endif /* !TRE_WCHAR */ 159 160 /* Types to handle bracket expressions. */ 161 typedef enum { 162 TRE_BRACKET_MATCH_TYPE_UNUSED = 0, 163 TRE_BRACKET_MATCH_TYPE_CHAR, /* Single character value */ 164 TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN, /* Collation range begin */ 165 TRE_BRACKET_MATCH_TYPE_RANGE_END, /* Collation range end */ 166 TRE_BRACKET_MATCH_TYPE_CLASS, /* Character class */ 167 TRE_BRACKET_MATCH_TYPE_EQUIVALENCE, /* Collation equivalence value */ 168 } tre_bracket_match_type_t; 169 170 typedef struct { 171 tre_bracket_match_type_t type; 172 tre_cint_t value; 173 } tre_bracket_match_t; 174 175 #define TRE_BRACKET_MATCH_FLAG_NEGATE 1 176 177 typedef struct { 178 int num_bracket_matches; 179 int flags; 180 tre_bracket_match_t bracket_matches[0]; 181 } tre_bracket_match_list_t; 182 183 #define SIZEOF_BRACKET_MATCH_LIST_N(n) (sizeof(tre_bracket_match_list_t) + \ 184 sizeof(tre_bracket_match_t) * (n)) 185 #define SIZEOF_BRACKET_MATCH_LIST(l) SIZEOF_BRACKET_MATCH_LIST_N( \ 186 (l)->num_bracket_matches) 187 188 /* The "count" field is the number of time the tag was set, initially zero. 189 The "first" field contains the first set value (when "count" equals 1). 190 The "value" field contains the current value of the tag, if "count" is 191 greater than zero (the tag's current value is -1 if "count" is zero). 192 The "touch" field is the touch value, a montonically increasing value 193 (maintained by the caller) set each time the tag itself is set. */ 194 typedef struct { 195 int count; 196 int first; 197 int value; 198 int touch; 199 } tre_tag_t; 200 201 /* TNFA transition type. A TNFA state is an array of transitions, 202 the terminator is a transition with NULL `state'. */ 203 typedef struct tnfa_transition tre_tnfa_transition_t; 204 205 struct tnfa_transition { 206 /* Range of accepted characters. */ 207 tre_cint_t code_min; 208 tre_cint_t code_max; 209 /* Pointer to the destination state. */ 210 tre_tnfa_transition_t *state; 211 /* ID number of the destination state. */ 212 int state_id; 213 /* -1 terminated array of tags (or NULL). */ 214 int *tags; 215 /* Matching parameters settings (or NULL). */ 216 int *params; 217 /* Assertion bitmap. */ 218 int assertions; 219 /* Assertion parameters. */ 220 union { 221 /* Bracket matches. */ 222 tre_bracket_match_list_t *bracket_match_list; 223 /* Back reference assertion. */ 224 int backref; 225 } u; 226 }; 227 228 229 /* Assertions. */ 230 #define ASSERT_AT_BOL 1 /* Beginning of line. */ 231 #define ASSERT_AT_EOL 2 /* End of line. */ 232 #define ASSERT_BRACKET_MATCH 4 /* Matches in `bracket_match_list'. */ 233 #define ASSERT_AT_BOW 8 /* Beginning of word. */ 234 #define ASSERT_AT_EOW 16 /* End of word. */ 235 #define ASSERT_AT_WB 32 /* Word boundary. */ 236 #define ASSERT_AT_WB_NEG 64 /* Not a word boundary. */ 237 #define ASSERT_BACKREF 128 /* A back reference in `backref'. */ 238 #define ASSERT_LAST 128 239 240 /* Tag directions. */ 241 typedef enum { 242 TRE_TAG_MINIMIZE = 0, 243 TRE_TAG_MAXIMIZE, 244 TRE_TAG_LEFT_MAXIMIZE, 245 } tre_tag_direction_t; 246 247 /* Parameters that can be changed dynamically while matching. */ 248 typedef enum { 249 TRE_PARAM_COST_INS = 0, 250 TRE_PARAM_COST_DEL = 1, 251 TRE_PARAM_COST_SUBST = 2, 252 TRE_PARAM_COST_MAX = 3, 253 TRE_PARAM_MAX_INS = 4, 254 TRE_PARAM_MAX_DEL = 5, 255 TRE_PARAM_MAX_SUBST = 6, 256 TRE_PARAM_MAX_ERR = 7, 257 TRE_PARAM_DEPTH = 8, 258 TRE_PARAM_LAST = 9 259 } tre_param_t; 260 261 /* Unset matching parameter */ 262 #define TRE_PARAM_UNSET -1 263 264 /* Signifies the default matching parameter value. */ 265 #define TRE_PARAM_DEFAULT -2 266 267 /* Instructions to compute submatch register values from tag values 268 after a successful match. */ 269 struct tre_submatch_data { 270 /* Tag that gives the value for rm_so (submatch start offset). */ 271 int so_tag; 272 /* Tag that gives the value for rm_eo (submatch end offset). */ 273 int eo_tag; 274 }; 275 276 typedef struct tre_submatch_data tre_submatch_data_t; 277 278 279 /* LAST MATCHED structures */ 280 struct __previous_type; 281 struct __previous_branch_type; 282 struct __current_type; 283 struct __current_branch_type; 284 285 typedef struct __previous_branch_type { 286 struct __previous_branch_type *next; 287 struct __previous_type *last_matched; int n_last_matched; int cmp_tag; int n_tags; 288 int tot_branches; 289 int tot_last_matched; 290 int tot_tags; 291 bitstr_t tags[0]; 292 } tre_last_matched_branch_pre_t; 293 294 typedef struct __previous_type { 295 struct __previous_type *next; 296 tre_last_matched_branch_pre_t *branches; int n_branches; int start_tag; 297 int tot_branches; 298 int tot_last_matched; 299 int tot_tags; 300 } tre_last_matched_pre_t; 301 302 typedef struct __current_branch_type { 303 int *tags; 304 struct __current_type *last_matched; int n_last_matched; int cmp_tag; int n_tags; 305 } tre_last_matched_branch_t; 306 307 typedef struct __current_type { 308 tre_last_matched_branch_t *branches; int n_branches; int start_tag; 309 } tre_last_matched_t; 310 311 /* TNFA definition. */ 312 typedef struct tnfa tre_tnfa_t; 313 314 struct tnfa { 315 tre_tnfa_transition_t *transitions; 316 tre_tnfa_transition_t *initial; 317 tre_tnfa_transition_t *final; 318 tre_submatch_data_t *submatch_data; 319 tre_tag_direction_t *tag_directions; 320 int *minimal_tags; 321 tre_last_matched_branch_t *last_matched_branch; 322 locale_t loc; 323 unsigned int num_transitions; 324 int first_char; 325 unsigned int num_submatches; 326 unsigned int num_submatches_invisible; 327 int num_tags; 328 int num_minimals; 329 int end_tag; 330 int num_states; 331 int cflags; 332 int have_backrefs; 333 int num_reorder_tags; 334 int have_approx; 335 int params_depth; 336 }; 337 338 int 339 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags, 340 locale_t loc); 341 342 void 343 tre_free(regex_t *preg); 344 345 reg_errcode_t 346 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 347 const tre_tnfa_t *tnfa, const tre_tag_t *tags, int match_eo); 348 349 reg_errcode_t 350 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, 351 tre_str_type_t type, tre_tag_t *match_tags, int eflags, 352 int *match_end_ofs); 353 354 reg_errcode_t 355 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, 356 int len, tre_str_type_t type, tre_tag_t *match_tags, 357 int eflags, int *match_end_ofs); 358 359 #ifdef TRE_APPROX 360 reg_errcode_t 361 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len, 362 tre_str_type_t type, tre_tag_t *match_tags, 363 regamatch_t *match, regaparams_t params, 364 int eflags, int *match_end_ofs); 365 #endif /* TRE_APPROX */ 366 367 #endif /* TRE_INTERNAL_H */ 368 369 /* EOF */ 370