1 #ifndef BSDCONV_H
2 #define BSDCONV_H
3
4 #include <stdint.h>
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #ifdef WIN32
9 #include <windows.h>
10 #else
11 #include <dlfcn.h>
12 #endif
13
14 #if defined(__linux__)
15 #ifndef _BSD_SOURCE
16 #define _BSD_SOURCE
17 #endif
18 #include <endian.h>
19 #elif defined(__APPLE__)
20 // https://github.com/cetic/foren6-analyzer/blob/43ad3c4cea2b6d89800a987a270e1018c0238ae6/src/apple-endian.h
21 #include <libkern/OSByteOrder.h>
22 #define htobe16(x) OSSwapHostToBigInt16(x)
23 #define htole16(x) OSSwapHostToLittleInt16(x)
24 #define be16toh(x) OSSwapBigToHostInt16(x)
25 #define le16toh(x) OSSwapLittleToHostInt16(x)
26 #define htobe32(x) OSSwapHostToBigInt32(x)
27 #define htole32(x) OSSwapHostToLittleInt32(x)
28 #define be32toh(x) OSSwapBigToHostInt32(x)
29 #define le32toh(x) OSSwapLittleToHostInt32(x)
30 #define htobe64(x) OSSwapHostToBigInt64(x)
31 #define htole64(x) OSSwapHostToLittleInt64(x)
32 #define be64toh(x) OSSwapBigToHostInt64(x)
33 #define le64toh(x) OSSwapLittleToHostInt64(x)
34 #else
35 #include <sys/endian.h>
36 #endif
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 //struct data_rt.flags
43 #define F_FREE 1
44 #define F_MARK 2
45
46 //struct bsdconv_phase.flags
47 #define F_MATCH 1
48 #define F_PENDING 2
49 #define F_LOOPBACK 4
50
51 #define en_offset(X) htole32(X)
52 #define de_offset(X) le32toh(X)
53 #define en_uint16(X) htole16(X)
54 #define de_uint16(X) le16toh(X)
55 typedef uint32_t offset_t;
56 typedef size_t bsdconv_counter_t;
57
58 enum bsdconv_phase_type {
59 _INPUT,
60 FROM,
61 INTER,
62 TO,
63 FILTER, //for convenient use in module functions
64 SCORER, //for convenient use in module functions
65 };
66
67 #ifdef _BSDCONV_INTERNAL
68 enum bsdconv_status {
69 CONTINUE,
70 DEADEND,
71 MATCH,
72 SUBMATCH,
73 SUBROUTINE,
74 SUBMATCH_SUBROUTINE,
75 NEXTPHASE,
76 NOMATCH,
77 NOOP,
78 };
79 #endif
80
81 enum bsdconv_output_mode {
82 BSDCONV_HOLD,
83 BSDCONV_AUTOMALLOC,
84 BSDCONV_PREMALLOCED,
85 BSDCONV_FILE,
86 BSDCONV_FD,
87 BSDCONV_NULL,
88 BSDCONV_PASS,
89 };
90
91 #ifdef _BSDCONV_INTERNAL
92 struct data_st {
93 offset_t data;
94 offset_t len;
95 offset_t next;
96 };
97 #endif
98
99 struct data_rt {
100 void *data;
101 size_t len;
102 struct data_rt *next;
103 unsigned char flags;
104 };
105
106 struct state_st {
107 unsigned char status;
108 offset_t data;
109 uint16_t beg;
110 uint16_t end;
111 offset_t base;
112 };
113
114 struct state_rt {
115 unsigned char status;
116 struct data_rt *data;
117 uint16_t beg;
118 uint16_t end;
119 offset_t base;
120 };
121
122 #ifdef _BSDCONV_INTERNAL
123 struct bsdconv_hash_entry {
124 char *key;
125 void *ptr;
126 struct bsdconv_hash_entry *next;
127 };
128 #endif
129
130 struct bsdconv_counter_entry {
131 char *key;
132 bsdconv_counter_t val;
133 struct bsdconv_counter_entry *next;
134 };
135
136 struct bsdconv_instance {
137 int output_mode;
138
139 struct data_rt input, output;
140
141 char flush;
142
143 struct bsdconv_phase *phase;
144 int phasen, phase_index;
145 struct bsdconv_hash_entry *hash;
146 struct bsdconv_counter_entry *counter;
147
148 bsdconv_counter_t *ierr;
149 bsdconv_counter_t *oerr;
150
151 struct data_rt *pool;
152 };
153
154 struct bsdconv_phase {
155 void *match_data;
156 struct data_rt *bak, *data_head, *data_tail, *curr;
157 struct state_rt state;
158 int index;
159 unsigned int i;
160 struct bsdconv_codec *codec;
161 int codecn;
162 offset_t offset;
163 char flags;
164 char type;
165 };
166
167 #ifdef WIN32
168 #define SHAREOBJECT HMODULE
169 #define OPEN_SHAREOBJECT(path) LoadLibrary(path)
170 #define SHAREOBJECT_SYMBOL(so, symbol) ((void *)GetProcAddress(so, symbol))
171 #define CLOSE_SHAREOBJECT(path) FreeLibrary(path)
172 #else
173 #define SHAREOBJECT void *
174 #define OPEN_SHAREOBJECT(path) dlopen(path, RTLD_LAZY)
175 #define SHAREOBJECT_SYMBOL(so, symbol) dlsym(so, symbol)
176 #define CLOSE_SHAREOBJECT(so) dlclose(so)
177 #endif
178
179 struct bsdconv_filter {
180 SHAREOBJECT so;
181 int (*cbfilter)(struct data_rt *);
182 };
183
184 struct bsdconv_scorer {
185 SHAREOBJECT so;
186 int (*cbscorer)(struct data_rt *);
187 };
188
189 struct bsdconv_codec {
190 #ifdef WIN32
191 HANDLE fd;
192 HANDLE md;
193 #else
194 int fd;
195 size_t maplen;
196 #endif
197 SHAREOBJECT dl;
198 char *argv;
199 char *z;
200 char *data_z;
201 char *desc;
202 void (*cbconv)(struct bsdconv_instance *);
203 void (*cbflush)(struct bsdconv_instance *);
204 int (*cbcreate)(struct bsdconv_instance *, struct bsdconv_hash_entry *arg);
205 void (*cbinit)(struct bsdconv_instance *);
206 void (*cbctl)(struct bsdconv_instance *, int, void *, size_t);
207 void (*cbdestroy)(struct bsdconv_instance *);
208 void *priv;
209 };
210
211 #ifndef EDOOFUS
212 #define EDOOFUS 88
213 #endif
214
215 #ifdef WIN32
216 #define EOPNOTSUPP ERROR_NOT_SUPPORTED
217 #define ENOMEM ERROR_NOT_ENOUGH_MEMORY
218 #define EINVAL ERROR_BAD_COMMAND
219 #define SHLIBEXT "dll"
220 #define REALPATH(path, buf) GetFullPathName(path, PATH_MAX+1, buf, NULL)
221 char * strsep(char **, const char *);
222 char * index(const char *, int);
223 char * getwd(char *);
224 #else
225 #define SetLastError(n) errno=n
226 #define GetLastError() errno
227 #define SHLIBEXT "so"
228 #define REALPATH(path, buf) realpath(path, buf)
229 #endif
230
231
232 //Internal API
233 #ifdef _BSDCONV_INTERNAL
234 struct bsdconv_filter *load_filter(const char *);
235 void unload_filter(struct bsdconv_filter *);
236
237 struct bsdconv_scorer *load_scorer(const char *);
238 void unload_scorer(struct bsdconv_scorer *);
239
240 #define LISTCPY_ST(INS, X,Y,Z) for(data_ptr=(Y);data_ptr;){ \
241 struct data_st data_st; \
242 DATA_MALLOC(INS, (X)->next); \
243 (X)=(X)->next; \
244 memcpy(&data_st, (char *)((Z)+(uintptr_t)data_ptr), sizeof(struct data_st)); \
245 data_ptr=(void *)(uintptr_t)de_offset(data_st.next); \
246 (X)->data=(char *)((Z)+(uintptr_t)de_offset(data_st.data)); \
247 (X)->len=data_st.len; \
248 (X)->flags=0; \
249 (X)->next=NULL; \
250 }
251
252 #define LISTCPY(INS, X,Y) do{ \
253 struct data_rt *data_ptr=(Y); \
254 while(data_ptr){ \
255 DATA_MALLOC(INS, (X)->next); \
256 (X)=(X)->next; \
257 *(X)=*data_ptr; \
258 (X)->flags=0; \
259 (X)->next=NULL; \
260 data_ptr=data_ptr->next; \
261 } \
262 }while(0);
263
264 #define LISTFREE(INS, X,Y,Z) while((X)->next && (X)->next!=(Y)){ \
265 data_ptr=(X)->next->next; \
266 DATUM_FREE(INS, (X)->next); \
267 if((Z)==(X)->next){ \
268 (Z)=(X); \
269 } \
270 (X)->next=data_ptr; \
271 }
272
get_offset(struct bsdconv_codec * codec,struct state_rt * state,int val)273 static inline offset_t get_offset(struct bsdconv_codec *codec, struct state_rt *state, int val){
274 offset_t offset;
275 if(val>=state->beg && val<state->end)
276 memcpy(&offset, codec->z + (uintptr_t)state->base + (val - state->beg) * sizeof(offset_t), sizeof(offset_t));
277 else
278 offset=0;
279 return offset;
280 }
281
read_state(struct bsdconv_codec * codec,uintptr_t p)282 static inline struct state_rt read_state(struct bsdconv_codec *codec, uintptr_t p){
283 struct state_st state_st;
284 struct state_rt state;
285 memcpy(&state_st, codec->z + p, sizeof(struct state_st));
286 state.status=state_st.status;
287 state.data=(void *)(uintptr_t)de_offset(state_st.data);
288 state.beg=de_uint16(state_st.beg);
289 state.end=de_uint16(state_st.end);
290 state.base=de_offset(state_st.base);
291 return state;
292 }
293
294 #define RESET(X) do{ \
295 ins->phase[X].index=0; \
296 ins->phase[X].offset=0; \
297 ins->phase[X].state=read_state(&ins->phase[X].codec[ins->phase[X].index], 0); \
298 }while(0)
299
300 #define CP(X) ((char *)(X))
301 #define UCP(X) ((unsigned char *)(X))
302
303 #define DATUM_FREE(INS, X) do{ if((X)->flags & F_FREE) free((X)->data); (X)->next=INS->pool; INS->pool=(X);}while(0)
304 #define DATA_FREE(INS, X) do{ struct data_rt *t,*p=(X); while(p){if(p->flags & F_FREE) free(p->data); t=p->next; p->next=INS->pool; INS->pool=p; p=t;}}while(0)
305
306 #define PREV_PHASE(INS) (&(INS)->phase[(INS)->phase_index-1])
307 #define LAST_PHASE(INS) (&(INS)->phase[(INS)->phasen])
308 #define THIS_PHASE(INS) (&(INS)->phase[(INS)->phase_index])
309 #define THIS_CODEC(INS) (&(INS)->phase[(INS)->phase_index].codec[(INS)->phase[(INS)->phase_index].index])
310 #endif
311
312 //API
313 //main
314 struct bsdconv_instance *bsdconv_create(const char *);
315 void bsdconv_init(struct bsdconv_instance *);
316 void bsdconv_ctl(struct bsdconv_instance *, int, void *, int);
317 void bsdconv_destroy(struct bsdconv_instance *);
318 void bsdconv(struct bsdconv_instance *);
319 char * bsdconv_error(void);
320 char *bsdconv_pack(struct bsdconv_instance *);
321
322 //counter
323 bsdconv_counter_t * bsdconv_counter(struct bsdconv_instance *, const char *);
324 void bsdconv_counter_reset(struct bsdconv_instance *, const char *);
325
326 //hash
327 void bsdconv_hash_set(struct bsdconv_instance *, const char *, void *);
328 void * bsdconv_hash_get(struct bsdconv_instance *, const char *);
329 int bsdconv_hash_has(struct bsdconv_instance *, const char *);
330 void bsdconv_hash_del(struct bsdconv_instance *, const char *);
331
332 //module
333 int bsdconv_module_vital(int, const char *);
334 char * bsdconv_solve_alias(int, const char *);
335 int bsdconv_module_check(int, const char *);
336 int bsdconv_codec_check(int, const char *);
337 char ** bsdconv_modules_list(int);
338 char ** bsdconv_codecs_list(int);
339
340 //codec
341 int loadcodec(struct bsdconv_codec *cd, int type);
342 void unloadcodec(struct bsdconv_codec *cd);
343
344 //util
345 int bsdconv_get_phase_index(struct bsdconv_instance *, int);
346 int bsdconv_get_codec_index(struct bsdconv_instance *, int, int);
347 char * bsdconv_insert_phase(const char *, const char *, int, int);
348 char * bsdconv_insert_codec(const char *, const char *, int, int);
349 char * bsdconv_replace_phase(const char *, const char *, int, int);
350 char * bsdconv_replace_codec(const char *, const char *, int, int);
351 void *bsdconv_malloc(size_t);
352 void bsdconv_free(void *);
353 int bsdconv_mkstemp(char *);
354 int str2datum(const char *, struct data_rt *);
355 struct data_rt * str2data(const char *, int *, struct bsdconv_instance *);
356 char * getCodecDir();
357
358 //Callback function interface
359 void cbconv(struct bsdconv_instance *);
360 void cbflush(struct bsdconv_instance *);
361 int cbcreate(struct bsdconv_instance *, struct bsdconv_hash_entry *);
362 void cbinit(struct bsdconv_instance *);
363 void cbctl(struct bsdconv_instance *, int, void *, size_t);
364 void cbdestroy(struct bsdconv_instance *);
365 int cbfilter(struct data_rt *data);
366
367 //CTL Action
368 enum bsdconv_ctl_action {
369 BSDCONV_CTL_ATTACH_SCORE = 0,
370 BSDCONV_CTL_ATTACH_OUTPUT_FILE = 3,
371 BSDCONV_CTL_AMBIGUOUS_PAD = 4
372 };
373
374 //Helpers
375 #define DATA_MALLOC(INS, X) do{if(INS->pool){(X)=INS->pool; INS->pool=INS->pool->next;}else{(X)=malloc(sizeof(struct data_rt));}}while(0)
376
dup_data_rt(struct bsdconv_instance * ins,struct data_rt * data)377 static inline struct data_rt * dup_data_rt(struct bsdconv_instance *ins, struct data_rt *data){
378 struct data_rt *ret;
379 DATA_MALLOC(ins, ret);
380 *ret=*data;
381 data->flags &= ~F_FREE;
382 return ret;
383 }
384
strtoupper(char * s)385 static inline void strtoupper(char *s){
386 char *c;
387 for(c=s;*c;++c){
388 if(*c>='a' && *c<='z'){
389 *c=*c-'a'+'A';
390 }
391 }
392 }
393
394 //Binary
395 #define bb00000011 0x03
396 #define bb00000111 0x07
397 #define bb00001000 0x08
398 #define bb00001111 0x0f
399 #define bb00011100 0x1c
400 #define bb00110000 0x30
401 #define bb00111100 0x3c
402 #define bb00111111 0x3f
403 #define bb01000000 0x40
404 #define bb10000000 0x80
405 #define bb11000000 0xc0
406 #define bb11011000 0xd8
407 #define bb11011100 0xdc
408 #define bb11100000 0xe0
409 #define bb11110000 0xf0
410 #define bb11111000 0xf8
411 #define bb11111100 0xfc
412 #define bb11111110 0xfe
413 #define bb1111111111 0x3ff
414 #define bb11111111110000000000 0xffc00
415
416 #ifdef USE_OCT_MAP
417 int oct[256]={-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,2,3,4,5,6,7,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
418 #endif
419 #ifdef USE_DEC_MAP
420 int dec[256]={-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
421 #endif
422 #ifdef USE_HEX_MAP
423 int hex[256]={-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
424 #endif
425
426 typedef union {
427 unsigned char byte[4];
428 uint32_t ucs4;
429 } ucs_t;
430
431 // modules/filter/unicode_range.c
432 struct uint32_range {
433 uint32_t first;
434 uint32_t last;
435 };
436
437 // modules/score/unicode_range.c
438 struct uint32_range_with_score {
439 uint32_t first;
440 uint32_t last;
441 uint32_t score;
442 };
443
444
445 #ifdef __cplusplus
446 }
447 #endif
448
449 #endif
450