1 /* posix.h - Posix.2 compatibility functions
2  *
3  ****************************************************************
4  * Copyright (C) 1998, 2000 Thomas Lord
5  *
6  * See the file "COPYING" for further information about
7  * the copyright and warranty status of this work.
8  */
9 
10 
11 
12 #ifndef INCLUDE__RX_POSIX__POSIX_H
13 #define INCLUDE__RX_POSIX__POSIX_H
14 
15 
16 
17 #include "hackerlab/rx-posix/errnorx.h"
18 #include "hackerlab/rx-posix/dup-max.h"
19 #include "hackerlab/rx-posix/match-regexp.h"
20 
21 
22 
23 
24 /* RE_DUP_MAX	(Required by Posix.2) An upper bound on the values
25  * 		`m' and `n' in a regexp of the form `RE{m,n}'.
26  *
27  * Posix requires this to be declared in "<limits.h>".
28  *
29  * The correct value for Rx is redefined (as a macro) in "hackerlab/rx-posix/limits.h".
30  */
31 
32 
33 /* regoff_t	(Required by Posix.2)  Used to represent offsets to
34  * 		substrings within a string matched by `regexec'.
35  * 		`regoff_t' is a signed arithmetic type that can hold
36  * 		the largest value that can be stored in either `off_t'
37  * 		or `ssize_t'.
38  *
39  * 		This declaration fails to conform to Posix.2 if
40  * 		either `off_t' or `ssize_t' is `long long' -- but
41  * 		that is unlikely to be the case.
42  */
43 typedef long regoff_t;
44 
45 /* regmatch_t	(Required by Posix.2).  Offsets to substrings matched
46  *		by parenthesized subexpressions.
47  *
48  * This structure includes the (required) fields:
49  *
50  * 	regoff_t rm_so; 	Byte offset from start of string to
51  * 				start of substring.
52  * 	regoff_t rm_eo;		Byte offset from start of string to
53  * 				the first character after the end
54  * 				of substring.
55  */
56 typedef struct rx_registers regmatch_t;
57 
58 /* regex_t	(Required by Posix.2) A compiled regexp, filled in
59  * 		by `regcomp'.
60  */
61 typedef struct rx_posix_regex
62 {
63   /* re_nsub	The number of parenthesized subexpressions.
64    * 		Filled in by `regcomp'.
65    *
66    * This field is required by Posix.2
67    *
68    */
69   size_t re_nsub;
70 
71 
72   /****************************************************************
73    * The remaining fields are implementation specific and are
74    * filled in by `regcomp'.
75    */
76 
77   /* pattern	The expression tree for the pattern.
78    * 		The reference count for the expression
79    * 		is incremented for `pattern'.
80    */
81   struct rx_exp_node * pattern;
82 
83   /* subexps	Pointers to the `re_nsub' parenthesized subexpressions
84    * 		of pattern.  The reference counts are NOT incremented
85    * 		for these references.
86    */
87   struct rx_exp_node ** subexps;
88 
89 
90   /* icase	non-0 iff regcomp was passed REG_ICASE
91    */
92   int icase;
93 
94   /* translate	0 or a 256 element mapping from characters to characters.
95    * 		Regcomp reads the pattern source string through this table
96    * 		and compiles a pattern that acts as if the target string
97    * 		is translated through this table.  Used to implement
98    * 		REG_ICASE.
99    */
100   t_uchar * translate;
101 
102   /* newline_anchor	If not 0, ^ matches after \n, $ before \n.
103    * 			Set by passing REG_NEWLINE to `regcomp'.
104    */
105   t_uchar newline_anchor;	/* If true, an anchor at a newline matches.*/
106 
107   /* is_left_anchored	If not 0, the pattern effectively begins with `^'.
108    *			For example, "(^abc)|(^def)" is anchored.
109    *
110    * is_right_anchored	Similarly for `$'.
111    */
112   int is_left_anchored;
113   int is_right_anchored;
114 
115   /* is_nullable	If not 0, the pattern can match the empty string.
116    */
117   t_uchar is_nullable;
118 
119   /* no_sub		If not 0, REG_NOSUB was passed to regcomp.
120    */
121   t_uchar no_sub;
122 
123 
124   /* fastmap		If `fastmap[c]' is 0, the pattern can not match
125    * 			a string that begins with character `c'.
126    */
127   t_uchar fastmap[256];
128 
129   /* small_p		If not 0, the pattern is "trivial" in such a way that
130    * 			the cost of pattern->NFA->DFA conversion should be
131    * 			avoided during matching in favor of matching based
132    * 			only on the expression tree for the pattern.
133    */
134   int small_p;
135 
136   /* owner_data		Arbitrary data, available to whoever called
137    * 			regcomp.   Do not rely on this feature.
138    */
139   void * owner_data;
140 } regex_t;
141 
142 
143 
144 /* enum rx_cflags
145  *
146  * Values which can be combined by a bitwise inclusive OR to
147  * form the `cflags' argument for `regcomp'.
148  *
149  * Most of these values are required by Posix.2.  `REG_DFA_ONLY' is an
150  * implementation-specific flag and should be avoided by programs
151  * which are intended to be portable between implementations of Posix.
152  */
153 enum rx_cflags
154 {
155   REG_EXTENDED = 1,
156     /*	If REG_EXTENDED is set, then use extended regular expression
157 	(ERE) syntax.  If not set, then use basic regular expression
158 	(BRE) syntax. In extended syntax, none of the regexp operators
159 	are written with a backslash. */
160 
161 
162   REG_ICASE = (REG_EXTENDED << 1),
163   /*   	If REG_ICASE is set, then ignore case when matching.
164    	If not set, then case is significant. */
165 
166 
167   REG_NOSUB = (REG_ICASE << 1),
168   /*	Report only success/failure in regexec. */
169 
170   REG_NEWLINE = (REG_NOSUB << 1),
171   /*	If REG_NEWLINE is set, then "." and complemented character
172 	sets do not match at newline characters in the string.  Also,
173 	"^" and "$" do match at newlines.
174 
175 	If not set, then anchors do not match at newlines
176 	and complimented character sets ordinarily contain newline. */
177 
178 
179   REG_DFA_ONLY = (REG_NEWLINE << 1),
180   /*	If this bit is set, then restrict the pattern language to patterns
181 	that compile to efficient state machines.
182 
183 	This is a non-standard feature. */
184 };
185 
186 
187 /* enum rx_eflags
188  *
189  * Values which can be combined by a bitwise inclusive OR to
190  * form the `eflags' argument for `regexec'.
191  *
192  * Flags required by Posix.2:
193  *
194  * 	REG_NOTBOL
195  * 	REG_NOTEOL
196  *
197  * Implementation-specific (non-standard) flags:
198  *
199  * 	REG_NO_SUBEXP_REPORTING
200  * 	REG_ALLOC_REGS
201  */
202 enum rx_eflags
203 {
204   REG_NOTBOL = 1,
205   /* If REG_NOTBOL is set, then the beginning-of-line operator `^'
206    * doesn't match the beginning of the input string (presumably
207    * because it's not the beginning of a line).  If not set, then the
208    * beginning-of-line operator does match the beginning of the
209    * string.
210    *
211    * (Required by Posix.2)
212    */
213 
214   REG_NOTEOL = (REG_NOTBOL << 1),
215   /* REG_NOTEOL is similar to REG_NOTBOL, except that it applies to
216    * the end-of-line operator `$' and the end of the input string.
217    *
218    * (Required by Posix.2)
219    */
220 
221   REG_NO_SUBEXP_REPORTING = (REG_NOTEOL << 1),
222   /* REG_NO_SUBEXP_REPORTING causes `regexec' to fill in only
223    * `pmatch[0]' and to ignore other elements of `pmatch'.  For some
224    * patterns (those which do not contain back-references or anchors)
225    * this can speed up matching considerably.
226    *
227    * (non-standard)
228    */
229 
230   REG_ALLOC_REGS = (REG_NO_SUBEXP_REPORTING << 1),
231   /* REG_ALLOC_REGS is only used by `regnexec'.  It causes `regnexec' to allocate storage
232    * for `regmatch_t' values.
233    *
234    * (non-standard)
235    */
236 };
237 
238 
239 
240 /* automatically generated __STDC__ prototypes */
241 extern int regcomp (regex_t * preg, const char * pattern, int cflags);
242 extern int regncomp (regex_t * preg,
243 		     const char * pattern,
244 		     size_t len,
245 		     int cflags);
246 extern void regfree (regex_t *preg);
247 extern size_t regerror (int errcode,
248 			const regex_t *preg,
249 			char *errbuf,
250 			size_t errbuf_size);
251 extern int regexec (const regex_t *preg,
252 		    const char *string,
253 		    size_t nmatch,
254 		    regmatch_t pmatch[],
255 		    int eflags);
256 extern int regnexec (const regex_t *preg,
257 		     const char *string,
258 		     regoff_t len,
259 		     size_t nmatch,
260 		     regmatch_t **pmatch,
261 		     int eflags);
262 #endif /* INCLUDE__RX_POSIX__POSIX_H */
263