1 /*
2 
3 Copyright (C) Jarkko Hietaniemi, 1998,1999,2000,2001,2002,2003.
4 All Rights Reserved.
5 
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of either:
8 
9 a) the GNU Library General Public License as published by the Free
10    Software Foundation; either version 2, or (at your option) any
11    later version, or
12 
13 b) the "Artistic License" which comes with Perl source code.
14 
15 Other free software licensing schemes are negotiable.
16 
17 Furthermore:
18 
19 (1) This software is provided as-is, without warranties or
20     obligations of any kind.
21 
22 (2) You shall include this copyright notice intact in all copies
23     and derived materials.
24 
25 */
26 
27 /*
28 
29   $Id: apse.h,v 1.14 1998/12/15 12:42:04 jhi Exp $
30 
31 */
32 
33 #ifndef APSE_H
34 #define APSE_H
35 
36 #define APSE_MAJOR_VERSION	0
37 #define APSE_MINOR_VERSION	16
38 
39 #include <sys/types.h>
40 
41 #ifdef APSE_VEC_T
42 typedef	APSE_VEC_T	apse_vec_t;
43 #else
44 typedef	unsigned long	apse_vec_t;
45 #endif
46 
47 #ifdef APSE_SIZE_T
48 typedef	APSE_SIZE_T	apse_size_t;
49 #else
50 typedef	unsigned long	apse_size_t;
51 #endif
52 
53 #ifdef APSE_SSIZE_T
54 typedef	APSE_SSIZE_T	apse_ssize_t;
55 #else
56 typedef	long		apse_ssize_t;
57 #endif
58 
59 #ifdef APSE_BOOL_T
60 typedef	APSE_BOOL_T	apse_bool_t;
61 #else
62 typedef	int		apse_bool_t;
63 #endif
64 
65 typedef struct apse_s {
66     apse_size_t		pattern_size;
67     apse_vec_t*		pattern_mask;
68 
69     apse_vec_t*		case_mask;
70     apse_vec_t*		fold_mask;
71 
72     apse_size_t		edit_distance;
73     apse_bool_t		has_different_distances;
74     apse_size_t		different_distances_max;
75     apse_size_t		edit_insertions;
76     apse_size_t		edit_deletions;
77     apse_size_t		edit_substitutions;
78     apse_bool_t		use_minimal_distance;
79 
80     apse_size_t		bitvectors_in_state;
81     apse_size_t		bytes_in_state;
82     apse_size_t		bytes_in_all_states;
83     apse_size_t		largest_distance;
84 
85     unsigned char*	text;
86     apse_size_t		text_size;
87     apse_size_t		text_position;
88     apse_size_t		text_initial_position;
89     apse_size_t		text_final_position;
90     apse_size_t		text_position_range;
91 
92     apse_vec_t*		state;
93     apse_vec_t*		prev_state;
94     apse_size_t		prev_equal;
95     apse_size_t		prev_active;
96     apse_size_t		match_begin_bitvector;
97     apse_vec_t		match_begin_bitmask;
98     apse_vec_t		match_begin_prefix;
99     apse_size_t		match_end_bitvector;
100     apse_vec_t		match_end_bitmask;
101     apse_bool_t		match_state;
102     apse_size_t		match_begin;
103     apse_size_t		match_end;
104 
105     void*		(*match_bot_callback)  (struct apse_s *ap);
106     void*		(*match_begin_callback)(struct apse_s *ap);
107     void*		(*match_fail_callback) (struct apse_s *ap);
108     void*		(*match_end_callback)  (struct apse_s *ap);
109     void*		(*match_eot_callback)  (struct apse_s *ap);
110 
111     apse_size_t	exact_positions;
112     apse_vec_t*	exact_mask;
113 
114     apse_bool_t	is_greedy;
115 
116     void*	custom_data;
117     apse_size_t	custom_data_size;
118 } apse_t;
119 
120 apse_t *apse_create(unsigned char*	pattern,
121 		    apse_size_t		pattern_size,
122 		    apse_size_t		edit_distance);
123 
124 apse_bool_t apse_match(apse_t*		ap,
125 		       unsigned char*	text,
126 		       apse_size_t	text_size);
127 apse_bool_t apse_match_next(apse_t*		ap,
128 			    unsigned char*	text,
129 			    apse_size_t		text_size);
130 
131 apse_ssize_t apse_index(apse_t*		ap,
132 			unsigned char*	text,
133 			apse_size_t 	text_size);
134 apse_ssize_t apse_index_next(apse_t*		ap,
135 			     unsigned char*	text,
136 			     apse_size_t 	text_size);
137 
138 apse_bool_t apse_slice(apse_t*		ap,
139 		       unsigned char*	text,
140 		       apse_size_t	text_size,
141 		       apse_size_t*	match_begin,
142 		       apse_size_t*	match_size);
143 apse_bool_t apse_slice_next(apse_t*		ap,
144 			    unsigned char*	text,
145 			    apse_size_t		text_size,
146 			    apse_size_t*	match_begin,
147 			    apse_size_t*	match_size);
148 
149 void apse_reset(apse_t *ap);
150 
151 apse_bool_t apse_set_pattern(apse_t*		ap,
152 			     unsigned char*	pattern,
153 			     apse_size_t	pattern_size);
154 apse_bool_t apse_set_edit_distance(apse_t *ap, apse_size_t edit_distance);
155 apse_size_t apse_get_edit_distance(apse_t *ap);
156 apse_bool_t apse_set_minimal_distance(apse_t* ap, apse_bool_t minimal);
157 apse_bool_t apse_get_minimal_distance(apse_t* ap);
158 apse_bool_t apse_set_text_position(apse_t *ap,
159 				   apse_size_t text_position);
160 apse_size_t apse_get_text_position(apse_t *ap);
161 apse_bool_t apse_set_text_initial_position(apse_t *ap,
162 					   apse_size_t text_initial_position);
163 apse_size_t apse_get_text_initial_position(apse_t *ap);
164 apse_bool_t apse_set_text_final_position(apse_t *ap,
165 					   apse_size_t text_final_position);
166 apse_size_t apse_get_text_final_position(apse_t *ap);
167 apse_bool_t apse_set_text_position_range(apse_t *ap,
168 					   apse_size_t text_position_range);
169 apse_size_t apse_get_text_position_range(apse_t *ap);
170 
171 apse_bool_t apse_set_insertions(apse_t *ap, apse_size_t insertions);
172 apse_bool_t apse_set_deletions(apse_t *ap, apse_size_t deletions);
173 apse_bool_t apse_set_substitutions(apse_t *ap, apse_size_t substitutions);
174 apse_size_t apse_get_insertions(apse_t *ap);
175 apse_size_t apse_get_deletions(apse_t *ap);
176 apse_size_t apse_get_substitutions(apse_t *ap);
177 
178 apse_bool_t apse_set_caseignore_slice(apse_t*		ap,
179 				      apse_ssize_t	caseignore_begin,
180 				      apse_ssize_t	caseignore_size,
181 				      apse_bool_t	caseignore);
182 
183 void apse_set_greedy(apse_t *ap, apse_bool_t greedy);
184 apse_bool_t apse_get_greedy(apse_t *ap);
185 
186 void apse_set_match_bot_callback(apse_t *ap,
187 				 void* (*match_bot_callback)(apse_t* ap));
188 void apse_set_match_begin_callback(apse_t *ap,
189 				   void* (*match_begin_callback)(apse_t* ap));
190 void apse_set_match_fail_callback(apse_t *ap,
191 				  void* (*match_fail_callback)(apse_t* ap));
192 void apse_set_match_end_callback(apse_t *ap,
193 				 void* (*match_end_callback)(apse_t* ap));
194 void apse_set_match_eot_callback(apse_t *ap,
195 				 void* (*match_eot_callback)(apse_t* ap));
196 void* (*apse_get_match_bot_callback(apse_t *ap))(apse_t *ap);
197 void* (*apse_get_match_begin_callback(apse_t *ap))(apse_t *ap);
198 void* (*apse_get_match_fail_callback(apse_t *ap))(apse_t *ap);
199 void* (*apse_get_match_end_callback(apse_t *ap))(apse_t *ap);
200 void* (*apse_get_match_eot_callback(apse_t *ap))(apse_t *ap);
201 
202 apse_bool_t apse_set_anychar(apse_t *ap, apse_ssize_t pattern_index);
203 
204 apse_bool_t apse_set_charset(apse_t*		ap,
205 			     apse_ssize_t	pattern_index,
206 			     unsigned char*	set,
207 			     apse_size_t	set_size,
208 			     apse_bool_t	complement);
209 
210 apse_bool_t apse_set_exact_slice(apse_t*	ap,
211 				 apse_ssize_t	exact_begin,
212 				 apse_ssize_t	exact_size,
213 				 apse_bool_t	exact);
214 
215 void apse_set_custom_data(apse_t*	ap,
216 			  void*		custom_data,
217 			  apse_size_t	custom_data_size);
218 void* apse_get_custom_data(apse_t*	ap,
219 			   apse_size_t*	custom_data_size);
220 
221 void apse_destroy(apse_t *ap);
222 
223 #define APSE_MATCH_BAD			((apse_size_t)  -1)
224 
225 #endif /* #ifndef APSE_H */
226