1 /* 2 tre-internal.h - TRE internal definitions 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifndef TRE_INTERNAL_H 10 #define TRE_INTERNAL_H 1 11 12 #ifdef HAVE_WCHAR_H 13 #include <wchar.h> 14 #endif /* HAVE_WCHAR_H */ 15 16 #ifdef HAVE_WCTYPE_H 17 #include <wctype.h> 18 #endif /* !HAVE_WCTYPE_H */ 19 20 #include <ctype.h> 21 #include "tre.h" 22 23 #ifdef TRE_DEBUG 24 #include <stdio.h> 25 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/(void)0,0) 26 #else /* !TRE_DEBUG */ 27 #define DPRINT(msg) do { } while(/*CONSTCOND*/(void)0,0) 28 #endif /* !TRE_DEBUG */ 29 30 #define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) 31 32 #ifdef HAVE_MBRTOWC 33 #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps))) 34 #else /* !HAVE_MBRTOWC */ 35 #ifdef HAVE_MBTOWC 36 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) 37 #endif /* HAVE_MBTOWC */ 38 #endif /* !HAVE_MBRTOWC */ 39 40 #ifdef TRE_MULTIBYTE 41 #ifdef HAVE_MBSTATE_T 42 #define TRE_MBSTATE 43 #endif /* TRE_MULTIBYTE */ 44 #endif /* HAVE_MBSTATE_T */ 45 46 /* Define the character types and functions. */ 47 #ifdef TRE_WCHAR 48 49 /* Wide characters. */ 50 typedef wint_t tre_cint_t; 51 #define TRE_CHAR_MAX WCHAR_MAX 52 53 #ifdef TRE_MULTIBYTE 54 #define TRE_MB_CUR_MAX MB_CUR_MAX 55 #else /* !TRE_MULTIBYTE */ 56 #define TRE_MB_CUR_MAX 1 57 #endif /* !TRE_MULTIBYTE */ 58 59 #define tre_isalnum iswalnum 60 #define tre_isalpha iswalpha 61 #ifdef HAVE_ISWBLANK 62 #define tre_isblank iswblank 63 #endif /* HAVE_ISWBLANK */ 64 #define tre_iscntrl iswcntrl 65 #define tre_isdigit iswdigit 66 #define tre_isgraph iswgraph 67 #define tre_islower iswlower 68 #define tre_isprint iswprint 69 #define tre_ispunct iswpunct 70 #define tre_isspace iswspace 71 #define tre_isupper iswupper 72 #define tre_isxdigit iswxdigit 73 74 #define tre_tolower towlower 75 #define tre_toupper towupper 76 #define tre_strlen wcslen 77 78 #else /* !TRE_WCHAR */ 79 80 /* 8 bit characters. */ 81 typedef short tre_cint_t; 82 #define TRE_CHAR_MAX 255 83 #define TRE_MB_CUR_MAX 1 84 85 #define tre_isalnum isalnum 86 #define tre_isalpha isalpha 87 #ifdef HAVE_ISASCII 88 #define tre_isascii isascii 89 #endif /* HAVE_ISASCII */ 90 #ifdef HAVE_ISBLANK 91 #define tre_isblank isblank 92 #endif /* HAVE_ISBLANK */ 93 #define tre_iscntrl iscntrl 94 #define tre_isdigit isdigit 95 #define tre_isgraph isgraph 96 #define tre_islower islower 97 #define tre_isprint isprint 98 #define tre_ispunct ispunct 99 #define tre_isspace isspace 100 #define tre_isupper isupper 101 #define tre_isxdigit isxdigit 102 103 #define tre_tolower(c) (tre_cint_t)(tolower(c)) 104 #define tre_toupper(c) (tre_cint_t)(toupper(c)) 105 #define tre_strlen(s) (strlen((const char*)s)) 106 107 #endif /* !TRE_WCHAR */ 108 109 #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE) 110 #define TRE_USE_SYSTEM_WCTYPE 1 111 #endif 112 113 #ifdef TRE_USE_SYSTEM_WCTYPE 114 /* Use system provided iswctype() and wctype(). */ 115 typedef wctype_t tre_ctype_t; 116 #define tre_isctype iswctype 117 #define tre_ctype wctype 118 #else /* !TRE_USE_SYSTEM_WCTYPE */ 119 /* Define our own versions of iswctype() and wctype(). */ 120 typedef int (*tre_ctype_t)(tre_cint_t); 121 #define tre_isctype(c, type) ( (type)(c) ) 122 tre_ctype_t tre_ctype(const char *name); 123 #endif /* !TRE_USE_SYSTEM_WCTYPE */ 124 125 typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t; 126 127 /* Returns number of bytes to add to (char *)ptr to make it 128 properly aligned for the type. */ 129 #define ALIGN(ptr, type) \ 130 ((((long)ptr) % sizeof(type)) \ 131 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \ 132 : 0) 133 134 #undef MAX 135 #undef MIN 136 #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) 137 #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) 138 139 /* Define STRF to the correct printf formatter for strings. */ 140 #ifdef TRE_WCHAR 141 #define STRF "ls" 142 #else /* !TRE_WCHAR */ 143 #define STRF "s" 144 #endif /* !TRE_WCHAR */ 145 146 /* TNFA transition type. A TNFA state is an array of transitions, 147 the terminator is a transition with NULL `state'. */ 148 typedef struct tnfa_transition tre_tnfa_transition_t; 149 150 struct tnfa_transition { 151 /* Range of accepted characters. */ 152 tre_cint_t code_min; 153 tre_cint_t code_max; 154 /* Pointer to the destination state. */ 155 tre_tnfa_transition_t *state; 156 /* ID number of the destination state. */ 157 int state_id; 158 /* -1 terminated array of tags (or NULL). */ 159 int *tags; 160 /* Matching parameters settings (or NULL). */ 161 int *params; 162 /* Assertion bitmap. */ 163 int assertions; 164 /* Assertion parameters. */ 165 union { 166 /* Character class assertion. */ 167 tre_ctype_t class; 168 /* Back reference assertion. */ 169 int backref; 170 } u; 171 /* Negative character class assertions. */ 172 tre_ctype_t *neg_classes; 173 }; 174 175 176 /* Assertions. */ 177 #define ASSERT_AT_BOL 1 /* Beginning of line. */ 178 #define ASSERT_AT_EOL 2 /* End of line. */ 179 #define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */ 180 #define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */ 181 #define ASSERT_AT_BOW 16 /* Beginning of word. */ 182 #define ASSERT_AT_EOW 32 /* End of word. */ 183 #define ASSERT_AT_WB 64 /* Word boundary. */ 184 #define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */ 185 #define ASSERT_BACKREF 256 /* A back reference in `backref'. */ 186 #define ASSERT_LAST 256 187 188 /* Tag directions. */ 189 typedef enum { 190 TRE_TAG_MINIMIZE = 0, 191 TRE_TAG_MAXIMIZE = 1 192 } tre_tag_direction_t; 193 194 /* Parameters that can be changed dynamically while matching. */ 195 typedef enum { 196 TRE_PARAM_COST_INS = 0, 197 TRE_PARAM_COST_DEL = 1, 198 TRE_PARAM_COST_SUBST = 2, 199 TRE_PARAM_COST_MAX = 3, 200 TRE_PARAM_MAX_INS = 4, 201 TRE_PARAM_MAX_DEL = 5, 202 TRE_PARAM_MAX_SUBST = 6, 203 TRE_PARAM_MAX_ERR = 7, 204 TRE_PARAM_DEPTH = 8, 205 TRE_PARAM_LAST = 9 206 } tre_param_t; 207 208 /* Unset matching parameter */ 209 #define TRE_PARAM_UNSET -1 210 211 /* Signifies the default matching parameter value. */ 212 #define TRE_PARAM_DEFAULT -2 213 214 /* Instructions to compute submatch register values from tag values 215 after a successful match. */ 216 struct tre_submatch_data { 217 /* Tag that gives the value for rm_so (submatch start offset). */ 218 int so_tag; 219 /* Tag that gives the value for rm_eo (submatch end offset). */ 220 int eo_tag; 221 /* List of submatches this submatch is contained in. */ 222 int *parents; 223 }; 224 225 typedef struct tre_submatch_data tre_submatch_data_t; 226 227 228 /* TNFA definition. */ 229 typedef struct tnfa tre_tnfa_t; 230 231 struct tnfa { 232 tre_tnfa_transition_t *transitions; 233 size_t num_transitions; 234 tre_tnfa_transition_t *initial; 235 tre_tnfa_transition_t *final; 236 tre_submatch_data_t *submatch_data; 237 char *firstpos_chars; 238 int first_char; 239 size_t num_submatches; 240 tre_tag_direction_t *tag_directions; 241 int *minimal_tags; 242 size_t num_tags; 243 size_t num_minimals; 244 int end_tag; 245 size_t num_states; 246 int cflags; 247 int have_backrefs; 248 int have_approx; 249 int params_depth; 250 }; 251 252 int 253 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags); 254 255 void 256 tre_free(regex_t *preg); 257 258 void 259 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 260 const tre_tnfa_t *tnfa, int *tags, int match_eo); 261 262 reg_errcode_t 263 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, 264 tre_str_type_t type, int *match_tags, int eflags, 265 int *match_end_ofs); 266 267 reg_errcode_t 268 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, 269 tre_str_type_t type, int *match_tags, int eflags, 270 int *match_end_ofs); 271 272 reg_errcode_t 273 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, 274 int len, tre_str_type_t type, int *match_tags, 275 int eflags, int *match_end_ofs); 276 277 #ifdef TRE_APPROX 278 reg_errcode_t 279 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len, 280 tre_str_type_t type, int *match_tags, 281 regamatch_t *match, regaparams_t params, 282 int eflags, int *match_end_ofs); 283 #endif /* TRE_APPROX */ 284 285 #endif /* TRE_INTERNAL_H */ 286 287 /* EOF */ 288