xref: /netbsd/external/bsd/tre/dist/lib/tre-internal.h (revision 8c408d61)
1 /*
2   tre-internal.h - TRE internal definitions
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifndef TRE_INTERNAL_H
10 #define TRE_INTERNAL_H 1
11 
12 #ifdef HAVE_WCHAR_H
13 #include <wchar.h>
14 #endif /* HAVE_WCHAR_H */
15 
16 #ifdef HAVE_WCTYPE_H
17 #include <wctype.h>
18 #endif /* !HAVE_WCTYPE_H */
19 
20 #include <ctype.h>
21 #include "tre.h"
22 
23 #ifdef TRE_DEBUG
24 #include <stdio.h>
25 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/(void)0,0)
26 #else /* !TRE_DEBUG */
27 #define DPRINT(msg) do { } while(/*CONSTCOND*/(void)0,0)
28 #endif /* !TRE_DEBUG */
29 
30 #define elementsof(x)	( sizeof(x) / sizeof(x[0]) )
31 
32 #ifdef HAVE_MBRTOWC
33 #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
34 #else /* !HAVE_MBRTOWC */
35 #ifdef HAVE_MBTOWC
36 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
37 #endif /* HAVE_MBTOWC */
38 #endif /* !HAVE_MBRTOWC */
39 
40 #ifdef TRE_MULTIBYTE
41 #ifdef HAVE_MBSTATE_T
42 #define TRE_MBSTATE
43 #endif /* TRE_MULTIBYTE */
44 #endif /* HAVE_MBSTATE_T */
45 
46 /* Define the character types and functions. */
47 #ifdef TRE_WCHAR
48 
49 /* Wide characters. */
50 typedef wint_t tre_cint_t;
51 #define TRE_CHAR_MAX WCHAR_MAX
52 
53 #ifdef TRE_MULTIBYTE
54 #define TRE_MB_CUR_MAX MB_CUR_MAX
55 #else /* !TRE_MULTIBYTE */
56 #define TRE_MB_CUR_MAX 1
57 #endif /* !TRE_MULTIBYTE */
58 
59 #define tre_isalnum iswalnum
60 #define tre_isalpha iswalpha
61 #ifdef HAVE_ISWBLANK
62 #define tre_isblank iswblank
63 #endif /* HAVE_ISWBLANK */
64 #define tre_iscntrl iswcntrl
65 #define tre_isdigit iswdigit
66 #define tre_isgraph iswgraph
67 #define tre_islower iswlower
68 #define tre_isprint iswprint
69 #define tre_ispunct iswpunct
70 #define tre_isspace iswspace
71 #define tre_isupper iswupper
72 #define tre_isxdigit iswxdigit
73 
74 #define tre_tolower towlower
75 #define tre_toupper towupper
76 #define tre_strlen  wcslen
77 
78 #else /* !TRE_WCHAR */
79 
80 /* 8 bit characters. */
81 typedef short tre_cint_t;
82 #define TRE_CHAR_MAX 255
83 #define TRE_MB_CUR_MAX 1
84 
85 #define tre_isalnum isalnum
86 #define tre_isalpha isalpha
87 #ifdef HAVE_ISASCII
88 #define tre_isascii isascii
89 #endif /* HAVE_ISASCII */
90 #ifdef HAVE_ISBLANK
91 #define tre_isblank isblank
92 #endif /* HAVE_ISBLANK */
93 #define tre_iscntrl iscntrl
94 #define tre_isdigit isdigit
95 #define tre_isgraph isgraph
96 #define tre_islower islower
97 #define tre_isprint isprint
98 #define tre_ispunct ispunct
99 #define tre_isspace isspace
100 #define tre_isupper isupper
101 #define tre_isxdigit isxdigit
102 
103 #define tre_tolower(c) (tre_cint_t)(tolower(c))
104 #define tre_toupper(c) (tre_cint_t)(toupper(c))
105 #define tre_strlen(s)  (strlen((const char*)s))
106 
107 #endif /* !TRE_WCHAR */
108 
109 #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
110 #define TRE_USE_SYSTEM_WCTYPE 1
111 #endif
112 
113 #ifdef TRE_USE_SYSTEM_WCTYPE
114 /* Use system provided iswctype() and wctype(). */
115 typedef wctype_t tre_ctype_t;
116 #define tre_isctype iswctype
117 #define tre_ctype   wctype
118 #else /* !TRE_USE_SYSTEM_WCTYPE */
119 /* Define our own versions of iswctype() and wctype(). */
120 typedef int (*tre_ctype_t)(tre_cint_t);
121 #define tre_isctype(c, type) ( (type)(c) )
122 tre_ctype_t tre_ctype(const char *name);
123 #endif /* !TRE_USE_SYSTEM_WCTYPE */
124 
125 typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
126 
127 /* Returns number of bytes to add to (char *)ptr to make it
128    properly aligned for the type. */
129 #define ALIGN(ptr, type) \
130   ((((long)ptr) % sizeof(type)) \
131    ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
132    : 0)
133 
134 #undef MAX
135 #undef MIN
136 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
137 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
138 
139 /* Define STRF to the correct printf formatter for strings. */
140 #ifdef TRE_WCHAR
141 #define STRF "ls"
142 #else /* !TRE_WCHAR */
143 #define STRF "s"
144 #endif /* !TRE_WCHAR */
145 
146 /* TNFA transition type. A TNFA state is an array of transitions,
147    the terminator is a transition with NULL `state'. */
148 typedef struct tnfa_transition tre_tnfa_transition_t;
149 
150 struct tnfa_transition {
151   /* Range of accepted characters. */
152   tre_cint_t code_min;
153   tre_cint_t code_max;
154   /* Pointer to the destination state. */
155   tre_tnfa_transition_t *state;
156   /* ID number of the destination state. */
157   int state_id;
158   /* -1 terminated array of tags (or NULL). */
159   int *tags;
160   /* Matching parameters settings (or NULL). */
161   int *params;
162   /* Assertion bitmap. */
163   int assertions;
164   /* Assertion parameters. */
165   union {
166     /* Character class assertion. */
167     tre_ctype_t class;
168     /* Back reference assertion. */
169     int backref;
170   } u;
171   /* Negative character class assertions. */
172   tre_ctype_t *neg_classes;
173 };
174 
175 
176 /* Assertions. */
177 #define ASSERT_AT_BOL		  1   /* Beginning of line. */
178 #define ASSERT_AT_EOL		  2   /* End of line. */
179 #define ASSERT_CHAR_CLASS	  4   /* Character class in `class'. */
180 #define ASSERT_CHAR_CLASS_NEG	  8   /* Character classes in `neg_classes'. */
181 #define ASSERT_AT_BOW		 16   /* Beginning of word. */
182 #define ASSERT_AT_EOW		 32   /* End of word. */
183 #define ASSERT_AT_WB		 64   /* Word boundary. */
184 #define ASSERT_AT_WB_NEG	128   /* Not a word boundary. */
185 #define ASSERT_BACKREF		256   /* A back reference in `backref'. */
186 #define ASSERT_LAST		256
187 
188 /* Tag directions. */
189 typedef enum {
190   TRE_TAG_MINIMIZE = 0,
191   TRE_TAG_MAXIMIZE = 1
192 } tre_tag_direction_t;
193 
194 /* Parameters that can be changed dynamically while matching. */
195 typedef enum {
196   TRE_PARAM_COST_INS	    = 0,
197   TRE_PARAM_COST_DEL	    = 1,
198   TRE_PARAM_COST_SUBST	    = 2,
199   TRE_PARAM_COST_MAX	    = 3,
200   TRE_PARAM_MAX_INS	    = 4,
201   TRE_PARAM_MAX_DEL	    = 5,
202   TRE_PARAM_MAX_SUBST	    = 6,
203   TRE_PARAM_MAX_ERR	    = 7,
204   TRE_PARAM_DEPTH	    = 8,
205   TRE_PARAM_LAST	    = 9
206 } tre_param_t;
207 
208 /* Unset matching parameter */
209 #define TRE_PARAM_UNSET -1
210 
211 /* Signifies the default matching parameter value. */
212 #define TRE_PARAM_DEFAULT -2
213 
214 /* Instructions to compute submatch register values from tag values
215    after a successful match.  */
216 struct tre_submatch_data {
217   /* Tag that gives the value for rm_so (submatch start offset). */
218   int so_tag;
219   /* Tag that gives the value for rm_eo (submatch end offset). */
220   int eo_tag;
221   /* List of submatches this submatch is contained in. */
222   int *parents;
223 };
224 
225 typedef struct tre_submatch_data tre_submatch_data_t;
226 
227 
228 /* TNFA definition. */
229 typedef struct tnfa tre_tnfa_t;
230 
231 struct tnfa {
232   tre_tnfa_transition_t *transitions;
233   size_t num_transitions;
234   tre_tnfa_transition_t *initial;
235   tre_tnfa_transition_t *final;
236   tre_submatch_data_t *submatch_data;
237   char *firstpos_chars;
238   int first_char;
239   size_t num_submatches;
240   tre_tag_direction_t *tag_directions;
241   int *minimal_tags;
242   size_t num_tags;
243   size_t num_minimals;
244   int end_tag;
245   size_t num_states;
246   int cflags;
247   int have_backrefs;
248   int have_approx;
249   int params_depth;
250 };
251 
252 int
253 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
254 
255 void
256 tre_free(regex_t *preg);
257 
258 void
259 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
260 		const tre_tnfa_t *tnfa, int *tags, int match_eo);
261 
262 reg_errcode_t
263 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
264 		      tre_str_type_t type, int *match_tags, int eflags,
265 		      int *match_end_ofs);
266 
267 reg_errcode_t
268 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
269 		      tre_str_type_t type, int *match_tags, int eflags,
270 		      int *match_end_ofs);
271 
272 reg_errcode_t
273 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
274 		       int len, tre_str_type_t type, int *match_tags,
275 		       int eflags, int *match_end_ofs);
276 
277 #ifdef TRE_APPROX
278 reg_errcode_t
279 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
280 		    tre_str_type_t type, int *match_tags,
281 		    regamatch_t *match, regaparams_t params,
282 		    int eflags, int *match_end_ofs);
283 #endif /* TRE_APPROX */
284 
285 #endif /* TRE_INTERNAL_H */
286 
287 /* EOF */
288