1 /* 2 3 Copyright (C) Jarkko Hietaniemi, 1998,1999,2000,2001,2002,2003. 4 All Rights Reserved. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of either: 8 9 a) the GNU Library General Public License as published by the Free 10 Software Foundation; either version 2, or (at your option) any 11 later version, or 12 13 b) the "Artistic License" which comes with Perl source code. 14 15 Other free software licensing schemes are negotiable. 16 17 Furthermore: 18 19 (1) This software is provided as-is, without warranties or 20 obligations of any kind. 21 22 (2) You shall include this copyright notice intact in all copies 23 and derived materials. 24 25 */ 26 27 /* 28 29 $Id: apse.h,v 1.14 1998/12/15 12:42:04 jhi Exp $ 30 31 */ 32 33 #ifndef APSE_H 34 #define APSE_H 35 36 #define APSE_MAJOR_VERSION 0 37 #define APSE_MINOR_VERSION 16 38 39 #include <sys/types.h> 40 41 #ifdef APSE_VEC_T 42 typedef APSE_VEC_T apse_vec_t; 43 #else 44 typedef unsigned long apse_vec_t; 45 #endif 46 47 #ifdef APSE_SIZE_T 48 typedef APSE_SIZE_T apse_size_t; 49 #else 50 typedef unsigned long apse_size_t; 51 #endif 52 53 #ifdef APSE_SSIZE_T 54 typedef APSE_SSIZE_T apse_ssize_t; 55 #else 56 typedef long apse_ssize_t; 57 #endif 58 59 #ifdef APSE_BOOL_T 60 typedef APSE_BOOL_T apse_bool_t; 61 #else 62 typedef int apse_bool_t; 63 #endif 64 65 typedef struct apse_s { 66 apse_size_t pattern_size; 67 apse_vec_t* pattern_mask; 68 69 apse_vec_t* case_mask; 70 apse_vec_t* fold_mask; 71 72 apse_size_t edit_distance; 73 apse_bool_t has_different_distances; 74 apse_size_t different_distances_max; 75 apse_size_t edit_insertions; 76 apse_size_t edit_deletions; 77 apse_size_t edit_substitutions; 78 apse_bool_t use_minimal_distance; 79 80 apse_size_t bitvectors_in_state; 81 apse_size_t bytes_in_state; 82 apse_size_t bytes_in_all_states; 83 apse_size_t largest_distance; 84 85 unsigned char* text; 86 apse_size_t text_size; 87 apse_size_t text_position; 88 apse_size_t text_initial_position; 89 apse_size_t text_final_position; 90 apse_size_t text_position_range; 91 92 apse_vec_t* state; 93 apse_vec_t* prev_state; 94 apse_size_t prev_equal; 95 apse_size_t prev_active; 96 apse_size_t match_begin_bitvector; 97 apse_vec_t match_begin_bitmask; 98 apse_vec_t match_begin_prefix; 99 apse_size_t match_end_bitvector; 100 apse_vec_t match_end_bitmask; 101 apse_bool_t match_state; 102 apse_size_t match_begin; 103 apse_size_t match_end; 104 105 void* (*match_bot_callback) (struct apse_s *ap); 106 void* (*match_begin_callback)(struct apse_s *ap); 107 void* (*match_fail_callback) (struct apse_s *ap); 108 void* (*match_end_callback) (struct apse_s *ap); 109 void* (*match_eot_callback) (struct apse_s *ap); 110 111 apse_size_t exact_positions; 112 apse_vec_t* exact_mask; 113 114 apse_bool_t is_greedy; 115 116 void* custom_data; 117 apse_size_t custom_data_size; 118 } apse_t; 119 120 apse_t *apse_create(unsigned char* pattern, 121 apse_size_t pattern_size, 122 apse_size_t edit_distance); 123 124 apse_bool_t apse_match(apse_t* ap, 125 unsigned char* text, 126 apse_size_t text_size); 127 apse_bool_t apse_match_next(apse_t* ap, 128 unsigned char* text, 129 apse_size_t text_size); 130 131 apse_ssize_t apse_index(apse_t* ap, 132 unsigned char* text, 133 apse_size_t text_size); 134 apse_ssize_t apse_index_next(apse_t* ap, 135 unsigned char* text, 136 apse_size_t text_size); 137 138 apse_bool_t apse_slice(apse_t* ap, 139 unsigned char* text, 140 apse_size_t text_size, 141 apse_size_t* match_begin, 142 apse_size_t* match_size); 143 apse_bool_t apse_slice_next(apse_t* ap, 144 unsigned char* text, 145 apse_size_t text_size, 146 apse_size_t* match_begin, 147 apse_size_t* match_size); 148 149 void apse_reset(apse_t *ap); 150 151 apse_bool_t apse_set_pattern(apse_t* ap, 152 unsigned char* pattern, 153 apse_size_t pattern_size); 154 apse_bool_t apse_set_edit_distance(apse_t *ap, apse_size_t edit_distance); 155 apse_size_t apse_get_edit_distance(apse_t *ap); 156 apse_bool_t apse_set_minimal_distance(apse_t* ap, apse_bool_t minimal); 157 apse_bool_t apse_get_minimal_distance(apse_t* ap); 158 apse_bool_t apse_set_text_position(apse_t *ap, 159 apse_size_t text_position); 160 apse_size_t apse_get_text_position(apse_t *ap); 161 apse_bool_t apse_set_text_initial_position(apse_t *ap, 162 apse_size_t text_initial_position); 163 apse_size_t apse_get_text_initial_position(apse_t *ap); 164 apse_bool_t apse_set_text_final_position(apse_t *ap, 165 apse_size_t text_final_position); 166 apse_size_t apse_get_text_final_position(apse_t *ap); 167 apse_bool_t apse_set_text_position_range(apse_t *ap, 168 apse_size_t text_position_range); 169 apse_size_t apse_get_text_position_range(apse_t *ap); 170 171 apse_bool_t apse_set_insertions(apse_t *ap, apse_size_t insertions); 172 apse_bool_t apse_set_deletions(apse_t *ap, apse_size_t deletions); 173 apse_bool_t apse_set_substitutions(apse_t *ap, apse_size_t substitutions); 174 apse_size_t apse_get_insertions(apse_t *ap); 175 apse_size_t apse_get_deletions(apse_t *ap); 176 apse_size_t apse_get_substitutions(apse_t *ap); 177 178 apse_bool_t apse_set_caseignore_slice(apse_t* ap, 179 apse_ssize_t caseignore_begin, 180 apse_ssize_t caseignore_size, 181 apse_bool_t caseignore); 182 183 void apse_set_greedy(apse_t *ap, apse_bool_t greedy); 184 apse_bool_t apse_get_greedy(apse_t *ap); 185 186 void apse_set_match_bot_callback(apse_t *ap, 187 void* (*match_bot_callback)(apse_t* ap)); 188 void apse_set_match_begin_callback(apse_t *ap, 189 void* (*match_begin_callback)(apse_t* ap)); 190 void apse_set_match_fail_callback(apse_t *ap, 191 void* (*match_fail_callback)(apse_t* ap)); 192 void apse_set_match_end_callback(apse_t *ap, 193 void* (*match_end_callback)(apse_t* ap)); 194 void apse_set_match_eot_callback(apse_t *ap, 195 void* (*match_eot_callback)(apse_t* ap)); 196 void* (*apse_get_match_bot_callback(apse_t *ap))(apse_t *ap); 197 void* (*apse_get_match_begin_callback(apse_t *ap))(apse_t *ap); 198 void* (*apse_get_match_fail_callback(apse_t *ap))(apse_t *ap); 199 void* (*apse_get_match_end_callback(apse_t *ap))(apse_t *ap); 200 void* (*apse_get_match_eot_callback(apse_t *ap))(apse_t *ap); 201 202 apse_bool_t apse_set_anychar(apse_t *ap, apse_ssize_t pattern_index); 203 204 apse_bool_t apse_set_charset(apse_t* ap, 205 apse_ssize_t pattern_index, 206 unsigned char* set, 207 apse_size_t set_size, 208 apse_bool_t complement); 209 210 apse_bool_t apse_set_exact_slice(apse_t* ap, 211 apse_ssize_t exact_begin, 212 apse_ssize_t exact_size, 213 apse_bool_t exact); 214 215 void apse_set_custom_data(apse_t* ap, 216 void* custom_data, 217 apse_size_t custom_data_size); 218 void* apse_get_custom_data(apse_t* ap, 219 apse_size_t* custom_data_size); 220 221 void apse_destroy(apse_t *ap); 222 223 #define APSE_MATCH_BAD ((apse_size_t) -1) 224 225 #endif /* #ifndef APSE_H */ 226