1 /* match-regexp.h - low-level functions for comparing a string to a regexp 2 * 3 **************************************************************** 4 * Copyright (C) 1998, 2000 Thomas Lord 5 * 6 * See the file "COPYING" for further information about 7 * the copyright and warranty status of this work. 8 */ 9 10 11 #ifndef INCLUDE__RX_POSIX__MATCH_REGEXP_H 12 #define INCLUDE__RX_POSIX__MATCH_REGEXP_H 13 14 15 16 #include "hackerlab/machine/types.h" 17 #include "hackerlab/rx/tree.h" 18 19 20 21 /* rx_off_t An internal type used by the Posix interface as `regoff_t'. 22 * 23 * (`regoff_t' is required by Posix.2) Used to represent 24 * offsets to substrings within a string matched by 25 * `regexec'. `regoff_t' is a signed arithmetic type 26 * that can hold the largest value that can be stored in 27 * either `off_t' or `long'. 28 * 29 */ 30 typedef long rx_off_t; 31 32 struct rx_registers 33 { 34 rx_off_t rm_so; /* Byte offset from string's start to substring's start. */ 35 rx_off_t rm_eo; /* Byte offset from string's start to substring's end. */ 36 int final_tag; /* In register 0 of an array of registers, this field 37 * is set to the state label of the last superstate encountered 38 * during a match. 39 */ 40 }; 41 42 43 /* struct rx_context_rules 44 * 45 * An argument to `rx_basic_make_solutions' used to specify 46 * the behavior of `^', `$', and backreferences. 47 */ 48 struct rx_context_rules 49 { 50 t_uchar newline_anchor; /* If true, an anchor at a newline matches.*/ 51 t_uchar not_bol; /* If set, the anchors ('^' and '$') don't */ 52 t_uchar not_eol; /* match at the ends of the string. */ 53 t_uchar case_indep; 54 }; 55 56 /* struct rx_solutions; 57 * 58 * A lazilly computed stream of solutions for an expression or 59 * subexpression compared to a string. 60 */ 61 struct rx_solutions; 62 63 64 /************************************************************************ 65 *(paragraphs) 66 */ 67 68 /*(c rx_vmfn :category type) 69 * typedef int (*rx_vmfn) (void * closure, 70 * const t_uchar ** burst, 71 * rx_off_t * len, 72 * rx_off_t * offset, 73 * rx_off_t start, rx_off_t end, rx_off_t need); 74 * 75 * An `rx_vmfn' is passed to `rx_make_solutions' and used by 76 * `rx_next_solution' to access the input string being compared to a 77 * regexp. The purpose of this function is to permit the calling 78 * program to only keep part of the input string in memory, and to 79 * keep the input string in non-contiguous regions in memory. 80 * 81 * When called, `rx_vmfn' is passed: 82 * 83 * `closure' -- the opaque parameter passed to `rx_make_solutions'. 84 * 85 * `burst' -- an output parameter that will point to part of the input 86 * string. The pointer returned in this parameter must remain valid 87 * until the next call to `rx_vmfn' or `rx_contextfn' for the same 88 * call to `rx_next_solution'. 89 * 90 * `len' -- an output parameter; the length of the string returned in 91 * `*burst'. 92 * 93 * `offset' -- an output parameter; the position of `*burst' within 94 * the input string (e.g., 0 for the beginning of the input string, 95 * 9 if `*burst' is the tenth character of the input). 96 * 97 * `start' through `end' are the input positions requested by Rx. 98 * `need' is the input position that must be returned. `rx_vmfn' is 99 * permitted to return any substring of the input that contains 100 * `need', but the performance of Rx itself is best if returns a 101 * substring containing at least the entire range from `start' to 102 * `end'. The precise performance implications of a particular 103 * implementation of `rx_vmfn' are application specific. 104 * 105 * Note that Rx may access parts of the string out of order and may 106 * visit the same part of the string more than once. 107 * 108 * This function should return 0 on success, and some other value on 109 * error. 110 */ 111 typedef int (*rx_vmfn) (void * closure, 112 const t_uchar ** burst, 113 rx_off_t * len, 114 rx_off_t * offset, 115 rx_off_t start, rx_off_t end, rx_off_t need); 116 117 118 119 /*(c rx_contextfn :category type) 120 * typedef int (*rx_contextfn) (void * closure, 121 * struct rx_exp_node * node, 122 * rx_off_t start, rx_off_t end, 123 * struct rx_registers * regs); 124 * 125 * An `rx_contextfn' is passed to `rx_make_solutions' and used by 126 * `rx_next_solution' to access the input string being compared to a 127 * regexp. The purpose of this function is to permit the calling 128 * program to only keep part of the input string in memory, and to 129 * keep the input string in non-contiguous regions in memory. 130 * 131 * `rx_contextfn' is responsible for evaluating subexpressions 132 * which are anchors (`^' and `$') and subexpressions which are 133 * backreferences (e.g. `\1'). 134 * 135 * When called, `rx_contextfn' is passed: 136 * 137 * `closure' -- the opaque parameter passed to `rx_make_solutions'. 138 * 139 * `node' -- The regexp syntax tree node of the expression to match. 140 * 141 * `start' and `end' -- the positions within the input string (from 142 * `start' to `end-1') that must match `node'. 143 * 144 * `reg' -- subexpression position information for preceeding 145 * subexpressions. This is used for backreferences. Note that if a 146 * previous subexpression was not matched, its starting and ending 147 * positions will be recorded as -1. 148 * 149 * This function should return 1 if the subexpression matches, 0 150 * otherwise. 151 */ 152 typedef int (*rx_contextfn) (void * closure, 153 struct rx_exp_node * node, 154 rx_off_t start, rx_off_t end, 155 struct rx_registers * regs); 156 157 158 /* automatically generated __STDC__ prototypes */ 159 extern struct rx_solutions * rx_basic_make_solutions (struct rx_registers * regs, 160 struct rx_exp_node * expression, 161 struct rx_exp_node ** subexps, 162 int nsub, 163 rx_off_t start, 164 rx_off_t end, 165 struct rx_context_rules * rules, 166 const t_uchar * str, 167 int small_p); 168 extern void rx_basic_free_solutions (struct rx_solutions * solns); 169 extern int rx_next_solution (struct rx_solutions * solns); 170 extern int rx_solutions_final_tag (struct rx_solutions * solns); 171 extern struct rx_solutions * rx_make_solutions (struct rx_registers * regs, 172 int cset_size, 173 struct rx_exp_node * expression, 174 struct rx_exp_node ** subexps, 175 int nsub, 176 rx_off_t start, 177 rx_off_t end, 178 int interval_x, 179 rx_vmfn vmfn, 180 rx_contextfn contextfn, 181 void * closure, 182 int small_p, 183 int certainly_fits, 184 int certain_final_tag); 185 extern void * rx_solutions_closure (struct rx_solutions * solns); 186 extern void rx_free_solutions (struct rx_solutions * solns); 187 extern int rx_simplify_rexp (struct rx_exp_node ** answer, 188 int cset_size, 189 struct rx_exp_node * node, 190 struct rx_exp_node ** subexps); 191 extern int rx_analyze_rexp (struct rx_exp_node *** subexps, 192 size_t * re_nsub, 193 struct rx_exp_node * node); 194 #endif /* INCLUDE__RX_POSIX__MATCH_REGEXP_H */ 195