1 #define USE_HEX_MAP
2 
3 #include <unistd.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <sys/param.h>
7 #include <sys/types.h>
8 #include <stdint.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <dirent.h>
12 #include "bsdconv.h"
13 #ifdef WIN32
14 #include <windows.h>
15 #else
16 #include <fcntl.h>
17 #include <errno.h>
18 #include <sys/stat.h>
19 #include <sys/mman.h>
20 #endif
21 
22 #ifndef MAP_PREFAULT_READ
23 #define MAP_PREFAULT_READ 0
24 #endif
25 
26 #ifdef WIN32
27 #define MODULES_SUBPATH "modules"
28 #else
29 #define MODULES_SUBPATH "share/bsdconv"
30 #endif
31 
32 struct bsdconv_instance *bsdconv_unpack(const char *);
33 char *bsdconv_pack(struct bsdconv_instance *);
34 
35 #include "libbsdconv_counter.c"
36 #include "libbsdconv_filter.c"
37 #include "libbsdconv_scorer.c"
38 #include "libbsdconv_hash.c"
39 #include "libbsdconv_module.c"
40 #include "libbsdconv_util.c"
41 
_cbcreate(struct bsdconv_instance * ins,int p,int c)42 static inline int _cbcreate(struct bsdconv_instance *ins, int p, int c){
43 	int r;
44 	char *argv;
45 	if(ins->phase[p].codec[c].argv)
46 		argv=strdup(ins->phase[p].codec[c].argv);
47 	else
48 		argv=strdup("");
49 	char *cur=argv;
50 	char *k;
51 	struct bsdconv_hash_entry *arg=NULL, *tmp;
52 	struct bsdconv_hash_entry **last=&arg;
53 	if(*cur){
54 		while((k=strsep(&cur, "&"))!=NULL){
55 			*last=malloc(sizeof(struct bsdconv_hash_entry));
56 			(*last)->key=k;
57 			(*last)->ptr=strchr(k, '=');
58 			if((*last)->ptr){
59 				*CP((*last)->ptr)=0;
60 				(*last)->ptr+=1;
61 			}
62 			(*last)->next=NULL;
63 			last=&((*last)->next);
64 		}
65 	}
66 	r=ins->phase[p].codec[c].cbcreate(ins, arg);
67 	free(argv);
68 	while(arg){
69 		tmp=arg->next;
70 		free(arg);
71 		arg=tmp;
72 	}
73 	return r;
74 }
75 
_loadcodec(struct bsdconv_codec * cd,char * path)76 int _loadcodec(struct bsdconv_codec *cd, char *path){
77 #ifdef WIN32
78 	if ((cd->fd=CreateFile(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL))==INVALID_HANDLE_VALUE){
79 		SetLastError(EOPNOTSUPP);
80 		return 0;
81 	}
82 	cd->md=CreateFileMapping(cd->fd, NULL, PAGE_READONLY, 0,0, NULL);
83 	if(!cd->md){
84 		CloseHandle(cd->fd);
85 		SetLastError(ENOMEM);
86 		return 0;
87 	}
88 	cd->data_z=cd->z=MapViewOfFile(cd->md, FILE_MAP_READ, 0,0,0);
89 	if(!cd->z){
90 		CloseHandle(cd->md);
91 		CloseHandle(cd->fd);
92 		SetLastError(ENOMEM);
93 		return 0;
94 	}
95 #else
96 	struct stat stat;
97 	if((cd->fd=open(path, O_RDONLY))==-1){
98 		SetLastError(EOPNOTSUPP);
99 		return 0;
100 	}
101 	fstat(cd->fd, &stat);
102 	cd->maplen=stat.st_size;
103 	if((cd->data_z=cd->z=mmap(0, stat.st_size, PROT_READ, MAP_PRIVATE | MAP_PREFAULT_READ, cd->fd, 0))==MAP_FAILED){
104 		close(cd->fd);
105 		SetLastError(ENOMEM);
106 		return 0;
107 	}
108 #endif
109 
110 	cd->dl=NULL;
111 	cd->cbcreate=NULL;
112 	cd->cbinit=NULL;
113 	cd->cbctl=NULL;
114 	cd->cbconv=NULL;
115 	cd->cbflush=NULL;
116 	cd->cbdestroy=NULL;
117 	strcat(path, "." SHLIBEXT);
118 
119 	if((cd->dl=OPEN_SHAREOBJECT(path))){
120 		cd->cbconv=SHAREOBJECT_SYMBOL(cd->dl,"cbconv");
121 		cd->cbflush=SHAREOBJECT_SYMBOL(cd->dl,"cbflush");
122 		cd->cbcreate=SHAREOBJECT_SYMBOL(cd->dl,"cbcreate");
123 		cd->cbinit=SHAREOBJECT_SYMBOL(cd->dl,"cbinit");
124 		cd->cbctl=SHAREOBJECT_SYMBOL(cd->dl,"cbctl");
125 		cd->cbdestroy=SHAREOBJECT_SYMBOL(cd->dl,"cbdestroy");
126 	}
127 
128 	return 1;
129 }
130 
loadcodec(struct bsdconv_codec * cd,int type)131 int loadcodec(struct bsdconv_codec *cd, int type){
132 	char *cwd;
133 	char *c;
134 	char buf[PATH_MAX+1];
135 	char *upper;
136 	cwd=getcwd(NULL, 0);
137 	if((c=getenv("BSDCONV_PATH"))){
138 		chdir(c);
139 	}else{
140 		chdir(BSDCONV_PATH);
141 	}
142 	chdir(MODULES_SUBPATH);
143 	switch(type){
144 		case FROM:
145 			chdir("from");
146 			break;
147 		case INTER:
148 			chdir("inter");
149 			break;
150 		case TO:
151 			chdir("to");
152 			break;
153 	}
154 	upper=strdup(cd->desc);
155 	strtoupper(upper);
156 	REALPATH(upper, buf);
157 	chdir(cwd);
158 	free(cwd);
159 	free(upper);
160 	if(!_loadcodec(cd, buf))
161 		return 0;
162 	return 1;
163 }
164 
unloadcodec(struct bsdconv_codec * cd)165 void unloadcodec(struct bsdconv_codec *cd){
166 	if(cd->dl){
167 		CLOSE_SHAREOBJECT(cd->dl);
168 	}
169 #ifdef WIN32
170 	UnmapViewOfFile(cd->z);
171 	CloseHandle(cd->md);
172 	CloseHandle(cd->fd);
173 #else
174 	munmap(cd->z, cd->maplen);
175 	close(cd->fd);
176 #endif
177 }
178 
bsdconv_init(struct bsdconv_instance * ins)179 void bsdconv_init(struct bsdconv_instance *ins){
180 	int i, j;
181 	struct data_rt *data_ptr;
182 
183 	ins->flush=0;
184 	ins->input.data=NULL;
185 	ins->input.flags=0;
186 	ins->input.len=0;
187 	ins->output.data=NULL;
188 	ins->output.len=0;
189 	ins->output_mode=BSDCONV_HOLD;
190 
191 	for(i=0;i<=ins->phasen;++i){
192 		ins->phase_index=i;
193 		ins->phase[i].flags=0;
194 		ins->phase[i].offset=0;
195 		while(ins->phase[i].data_head->next){
196 			data_ptr=ins->phase[i].data_head->next;
197 			ins->phase[i].data_head->next=ins->phase[i].data_head->next->next;
198 			if(data_ptr->flags & F_FREE)
199 				free(data_ptr->data);
200 			free(data_ptr);
201 		}
202 		ins->phase[i].data_tail=ins->phase[i].data_head;
203 		ins->phase[i].data_head->len=0;
204 		ins->phase[i].match_data=NULL;
205 		if(i>0){
206 			ins->phase[i].curr=ins->phase[i-1].data_head;
207 			for(j=0;j<=ins->phase[i].codecn;++j){
208 				ins->phase[i].index=j;
209 				if(ins->phase[i].codec[j].cbinit)
210 					ins->phase[i].codec[j].cbinit(ins);
211 			}
212 			RESET(i);
213 		}
214 	}
215 }
216 
bsdconv_ctl(struct bsdconv_instance * ins,int ctl,void * p,int v)217 void bsdconv_ctl(struct bsdconv_instance *ins, int ctl, void *p, int v){
218 	int i, j;
219 	for(i=1;i<=ins->phasen;++i){
220 		for(j=0;j<=ins->phase[i].codecn;++j){
221 			if(ins->phase[i].codec[j].cbctl){
222 				ins->phase_index=i;
223 				ins->phase[i].index=j;
224 				ins->phase[i].codec[j].cbctl(ins, ctl, p, v);
225 			}
226 		}
227 	}
228 }
229 
bsdconv_pack(struct bsdconv_instance * ins)230 char *bsdconv_pack(struct bsdconv_instance *ins){
231 	char *ret;
232 	char *t, *cur;
233 	const char *end;
234 	int len=0;
235 	int i, j, n;
236 	for(i=1;i<=ins->phasen;++i){
237 		for(j=0;j<=ins->phase[i].codecn;++j){
238 			len+=strlen(ins->phase[i].codec[j].desc);
239 			t=ins->phase[i].codec[j].desc;
240 			n=1;
241 			while(*t){
242 				if(*t==',')
243 					n+=1;
244 				t++;
245 			}
246 			if(ins->phase[i].codec[j].argv)
247 				len+=(strlen(ins->phase[i].codec[j].argv)+1)*n;
248 			len+=1;
249 		}
250 	}
251 	ret=malloc(sizeof(char) * len);
252 	ret[0]=0;
253 	for(i=1;i<=ins->phasen;++i){
254 		for(j=0;j<=ins->phase[i].codecn;++j){
255 			if(j==0){
256 				if(i>1){
257 					switch(ins->phase[i].type){
258 						case FROM:
259 							strcat(ret, "|");
260 							break;
261 						case INTER:
262 						case TO:
263 							strcat(ret, ":");
264 						break;
265 					}
266 				}
267 			}else{
268 				strcat(ret, ",");
269 			}
270 			t=ins->phase[i].codec[j].desc;
271 			while(1){
272 				cur=strchr(t, ',');
273 				if(cur){
274 					end=",";
275 					*cur=0;
276 				}else{
277 					end="";
278 				}
279 				strcat(ret, t);
280 				if(ins->phase[i].codec[j].argv && *(ins->phase[i].codec[j].argv)){
281 					if(strchr(t,'#')==NULL)
282 						strcat(ret, "#");
283 					else
284 						strcat(ret, "&");
285 					strcat(ret, ins->phase[i].codec[j].argv);
286 				}
287 				strcat(ret, end);
288 				if(cur)
289 					t=cur+1;
290 				else
291 					break;
292 			}
293 		}
294 	}
295 	return ret;
296 }
297 
bsdconv_unpack(const char * _conversion)298 struct bsdconv_instance *bsdconv_unpack(const char *_conversion){
299 	struct bsdconv_instance *ins=malloc(sizeof(struct bsdconv_instance));
300 	char *conversion;
301 	char *t, *t1;
302 	int i, j;
303 	int f=0;
304 
305 	ins->pool=NULL;
306 	ins->hash=NULL;
307 	ins->counter=NULL;
308 	ins->input.flags=0;
309 	ins->output.flags=0;
310 
311 	conversion=strdup(_conversion);
312 	t1=t=conversion;
313 	i=1;
314 	for(t=(char *)conversion;*t;t++){
315 		if(*t==':' || *t=='|')++i;
316 	}
317 	ins->phasen=i;
318 	char *phase_off[i+1];
319 
320 	ins->phase=malloc(sizeof(struct bsdconv_phase) * (i+1));
321 
322 	i=1;
323 	t1=t=conversion;
324 	while((t1=strsep(&t, "|")) != NULL){
325 		if(f>1){
326 			ins->phase[i-f].type=FROM;
327 			ins->phase[i-1].type=TO;
328 		}
329 		f=0;
330 		while((phase_off[i]=strsep(&t1, ":"))!=NULL){
331 			ins->phase[i].type=INTER;
332 			i+=1;
333 			f+=1;
334 		}
335 	}
336 	if(f>1){
337 		ins->phase[i-f].type=FROM;
338 		ins->phase[i-1].type=TO;
339 	}
340 	ins->phase[0].type=_INPUT;
341 
342 	for(i=1;i<=ins->phasen;++i){
343 		if(*phase_off[i]){
344 			ins->phase[i].codecn = 0;
345 			for(t=(char *)phase_off[i];*t;t++){
346 				if(*t==','){
347 					ins->phase[i].codecn+=1;
348 				}
349 			}
350 		}else{ // empty phase
351 			free(ins->phase);
352 			free(ins);
353 			free(conversion);
354 			return NULL;
355 		}
356 	}
357 	for(i=1;i<=ins->phasen;++i){
358 		ins->phase[i].codec=malloc((ins->phase[i].codecn + 1)* sizeof(struct bsdconv_codec));
359 	}
360 	for(i=1;i<=ins->phasen;++i){
361 		t=phase_off[i];
362 		for(j=0;j<=ins->phase[i].codecn;++j){
363 			ins->phase[i].codec[j].desc=strdup(strsep(&t, ","));
364 			ins->phase[i].codec[j].argv=strchr(ins->phase[i].codec[j].desc, '#');
365 			if(ins->phase[i].codec[j].argv){
366 				*(ins->phase[i].codec[j].argv)=0;
367 				ins->phase[i].codec[j].argv+=1;
368 			}
369 			if(ins->phase[i].codec[j].desc[0]==0){
370 				for(;j>=0;--j){
371 					free(ins->phase[i].codec[j].desc);
372 				}
373 				for(i=1;i<=ins->phasen;++i){
374 					free(ins->phase[i].codec);
375 				}
376 				free(ins->phase);
377 				free(ins);
378 				free(conversion);
379 				return NULL;
380 			}
381 		}
382 	}
383 	free(conversion);
384 	return ins;
385 }
386 
bsdconv_create(const char * _conversion)387 struct bsdconv_instance *bsdconv_create(const char *_conversion){
388 	int e=0;
389 	struct bsdconv_instance *ins=NULL;
390 	char *conversion=malloc(strlen(_conversion)+1);
391 	int i, j;
392 	char *c;
393 	const char *d;
394 	char whitespace[256]={0};
395 	whitespace['\r']=1;
396 	whitespace['\n']=1;
397 	whitespace['\t']=1;
398 	whitespace['\f']=1;
399 	whitespace[' ']=1;
400 	d=_conversion;
401 	c=conversion;
402 	while(*d){
403 		if(whitespace[*UCP(d)]==0){
404 			*c=*d;
405 			c+=1;
406 		}
407 		d+=1;
408 	}
409 	*c=0;
410 
411 	i=0;
412 	while(i==0 || i<=ins->phasen){
413 		start_parse:
414 		ins=bsdconv_unpack(conversion);
415 		if(ins==NULL){
416 			free(conversion);
417 			SetLastError(EINVAL);
418 			return NULL;
419 		}
420 		for(i=1;i<=ins->phasen;++i){
421 			for(j=0;j<=ins->phase[i].codecn;++j){
422 				if(!bsdconv_module_check(ins->phase[i].type, ins->phase[i].codec[j].desc)){
423 					if(bsdconv_module_vital(ins->phase[i].type, ins->phase[i].codec[j].desc)){
424 						printf("error %s\n", ins->phase[i].codec[j].desc);
425 						free(conversion);
426 						bsdconv_destroy(ins);
427 						SetLastError(EDOOFUS);
428 						return NULL;
429 					}
430 					c=bsdconv_solve_alias(ins->phase[i].type, ins->phase[i].codec[j].desc);
431 					if(c==NULL){
432 						e=1;
433 					}else{
434 						if(strcmp(c, ins->phase[i].codec[j].desc)==0)
435 							e=1;
436 						free(ins->phase[i].codec[j].desc);
437 						ins->phase[i].codec[j].desc=c;
438 					}
439 					free(conversion);
440 					conversion=bsdconv_pack(ins);
441 					for(i=1;i<=ins->phasen;++i){
442 						for(j=0;j<=ins->phase[i].codecn;++j){
443 							free(ins->phase[i].codec[j].desc);
444 						}
445 						free(ins->phase[i].codec);
446 					}
447 					free(ins->phase);
448 					free(ins);
449 					if(e){
450 						SetLastError(EOPNOTSUPP);
451 						free(conversion);
452 						return NULL;
453 					}
454 					goto start_parse;
455 				}
456 			}
457 		}
458 	}
459 	for(i=1;i<=ins->phasen;++i){
460 		for(j=0;j<=ins->phase[i].codecn;++j){
461 			if(!loadcodec(&ins->phase[i].codec[j], ins->phase[i].type)){
462 				free(ins->phase[i].codec[j].desc);
463 				j-=1;
464 				for(;i>=1;j=ins->phase[--i].codecn){
465 					for(;j>=0;--j){
466 						free(ins->phase[i].codec[j].desc);
467 						unloadcodec(&ins->phase[i].codec[j]);
468 					}
469 				}
470 				goto bsdconv_create_error;
471 			}
472 		}
473 	}
474 
475 	ins->ierr=bsdconv_counter(ins, "IERR");
476 	ins->oerr=bsdconv_counter(ins, "OERR");
477 
478 	for(i=1;i<=ins->phasen;++i){
479 		for(j=0;j<=ins->phase[i].codecn;++j){
480 			if(ins->phase[i].codec[j].cbcreate){
481 				ins->phase_index=i;
482 				ins->phase[i].index=j;
483 				e=_cbcreate(ins, i, j);
484 				if(e){
485 					for(j=j-1;j>=0;j-=1){
486 						if(ins->phase[i].codec[j].cbdestroy){
487 							ins->phase_index=i;
488 							ins->phase[i].index=j;
489 							ins->phase[i].codec[j].cbdestroy(ins);
490 						}
491 					}
492 					for(i=i-1;i>=1;i-=1){
493 						for(j=0;j<=ins->phase[i].codecn;++j){
494 							if(ins->phase[i].codec[j].cbdestroy){
495 								ins->phase_index=i;
496 								ins->phase[i].index=j;
497 								ins->phase[i].codec[j].cbdestroy(ins);
498 							}
499 						}
500 					}
501 					for(i=1;i<=ins->phasen;++i){
502 						for(j=0;j<=ins->phase[i].codecn;++j){
503 							free(ins->phase[i].codec[j].desc);
504 							unloadcodec(&ins->phase[i].codec[j]);
505 						}
506 					}
507 					SetLastError(e);
508 					goto bsdconv_create_error;
509 				}
510 			}
511 		}
512 	}
513 	for(i=0;i<=ins->phasen;++i){
514 		ins->phase[i].data_head=malloc(sizeof(struct data_rt));
515 		ins->phase[i].data_head->next=NULL;
516 		ins->phase[i].data_head->flags=0;
517 	}
518 
519 	free(conversion);
520 	return ins;
521 
522 bsdconv_create_error:
523 	for(i=1;i<=ins->phasen;++i){
524 		free(ins->phase[i].codec);
525 	}
526 
527 	free(conversion);
528 	free(ins->phase);
529 
530 	void *p;
531 	while(ins->hash){
532 		free(ins->hash->key);
533 		p=ins->hash->next;
534 		free(ins->hash);
535 		ins->hash=p;
536 	}
537 	while(ins->counter){
538 		free(ins->counter->key);
539 		p=ins->counter->next;
540 		free(ins->counter);
541 		ins->counter=p;
542 	}
543 	free(ins);
544 	return NULL;
545 }
546 
bsdconv_destroy(struct bsdconv_instance * ins)547 void bsdconv_destroy(struct bsdconv_instance *ins){
548 	int i, j;
549 	struct data_rt *data_ptr;
550 	void *p;
551 
552 	for(i=0;i<=ins->phasen;++i){
553 		if(i>0){
554 			for(j=0;j<=ins->phase[i].codecn;++j){
555 				free(ins->phase[i].codec[j].desc);
556 				if(ins->phase[i].codec[j].cbdestroy){
557 					ins->phase_index=i;
558 					ins->phase[i].index=j;
559 					ins->phase[i].codec[j].cbdestroy(ins);
560 				}
561 				unloadcodec(&ins->phase[i].codec[j]);
562 			}
563 			free(ins->phase[i].codec);
564 		}
565 		while(ins->phase[i].data_head){
566 			data_ptr=ins->phase[i].data_head;
567 			ins->phase[i].data_head=ins->phase[i].data_head->next;
568 			if(data_ptr->flags & F_FREE)
569 				free(data_ptr->data);
570 			free(data_ptr);
571 		}
572 	}
573 	while(ins->pool){
574 		data_ptr=ins->pool;
575 		ins->pool=ins->pool->next;
576 		free(data_ptr);
577 	}
578 	free(ins->phase);
579 	while(ins->hash){
580 		free(ins->hash->key);
581 		p=ins->hash->next;
582 		free(ins->hash);
583 		ins->hash=p;
584 	}
585 	while(ins->counter){
586 		free(ins->counter->key);
587 		p=ins->counter->next;
588 		free(ins->counter);
589 		ins->counter=p;
590 	}
591 	free(ins);
592 }
593 
bsdconv(struct bsdconv_instance * ins)594 void bsdconv(struct bsdconv_instance *ins){
595 	struct bsdconv_instance *inso;
596 	uintptr_t i;
597 	struct data_rt *data_ptr;
598 	char *ptr;
599 	FILE *fp;
600 	int fd;
601 	unsigned char c;
602 	struct bsdconv_phase *prev_phase;
603 	struct bsdconv_phase *this_phase;
604 	struct bsdconv_codec *this_codec;
605 
606 	if(ins->input.data!=NULL){
607 		DATA_MALLOC(ins, ins->phase[0].data_tail->next);
608 		ins->phase[0].data_tail=ins->phase[0].data_tail->next;
609 		*(ins->phase[0].data_tail)=ins->input;
610 		ins->input.data=NULL;
611 		ins->input.len=0;
612 		ins->input.flags=0;
613 	}
614 
615 	ins->phase_index=1;
616 
617 	phase_begin:
618 	if(ins->phase_index>0 && ins->phase_index<=ins->phasen){
619 		prev_phase=PREV_PHASE(ins);
620 		this_phase=THIS_PHASE(ins);
621 		this_codec=THIS_CODEC(ins);
622 		switch(this_phase->type){
623 			case FROM:
624 				while(this_phase->curr->next){
625 					if(this_phase->curr == prev_phase->data_head) this_phase->i=this_phase->data_head->len;
626 					else this_phase->i=0;
627 					this_phase->curr=this_phase->curr->next;
628 					while(this_phase->i<this_phase->curr->len){
629 						c=UCP(this_phase->curr->data)[this_phase->i];
630 						offset_t next = get_offset(this_codec, &this_phase->state, c);
631 						if(next){
632 							this_phase->offset = next;
633 						}else if(!(this_phase->flags & F_LOOPBACK)){
634 							this_phase->offset=0;
635 						}
636 						this_phase->state=read_state(this_codec, this_phase->offset);
637 						from_x:
638 						switch(this_phase->state.status){
639 							case DEADEND:
640 								from_deadend:
641 								this_phase->flags &= ~(F_PENDING | F_LOOPBACK);
642 								if(this_phase->flags & F_MATCH){
643 									if(this_phase->match_data){
644 										LISTCPY_ST(ins, this_phase->data_tail, this_phase->match_data, this_codec->data_z);
645 
646 										LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
647 										this_phase->curr=prev_phase->data_head;
648 										this_phase->i=this_phase->data_head->len;
649 									}else if(this_codec->cbflush){
650 										this_codec->cbflush(ins);
651 									}
652 									this_phase->flags &= ~F_MATCH;
653 									RESET(ins->phase_index);
654 									goto phase_begin;
655 								}else if(this_phase->index < this_phase->codecn){
656 									this_phase->index++;
657 									this_codec=THIS_CODEC(ins);
658 
659 									this_phase->state=read_state(this_codec, 0);
660 
661 									this_phase->curr=prev_phase->data_head;
662 									this_phase->i=this_phase->data_head->len;
663 									continue;
664 								}else{
665 									*(ins->ierr)+=1;
666 
667 									RESET(ins->phase_index);
668 									this_codec=THIS_CODEC(ins);
669 
670 									this_phase->bak=this_phase->curr;
671 									LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
672 									this_phase->bak=this_phase->curr=prev_phase->data_head;
673 									this_phase->i=this_phase->data_head->len=this_phase->data_head->len+1;
674 									continue;
675 								}
676 								break;
677 							case MATCH:
678 								this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
679 								this_phase->match_data=NULL;
680 
681 								LISTCPY_ST(ins, this_phase->data_tail, this_phase->state.data, this_codec->data_z);
682 
683 								this_phase->bak=this_phase->curr;
684 								LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
685 								this_phase->curr=prev_phase->data_head;
686 								this_phase->data_head->len=this_phase->i+1;
687 
688 								RESET(ins->phase_index);
689 
690 								ins->phase_index+=1;
691 								goto phase_begin;
692 							case SUBMATCH:
693 								this_phase->flags |= (F_MATCH | F_PENDING);
694 								this_phase->match_data=this_phase->state.data;
695 
696 								this_phase->bak=this_phase->curr;
697 								this_phase->data_head->len=this_phase->i+1;
698 								break;
699 							case SUBROUTINE:
700 							case SUBMATCH_SUBROUTINE:
701 								this_codec->cbconv(ins);
702 								this_phase->flags |= F_LOOPBACK;
703 
704 								goto from_x;
705 							case NEXTPHASE:
706 								this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
707 								this_phase->match_data=NULL;
708 
709 								this_phase->bak=this_phase->curr;
710 								LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
711 								this_phase->curr=prev_phase->data_head;
712 								this_phase->data_head->len=this_phase->i+1;
713 
714 								RESET(ins->phase_index);
715 
716 								ins->phase_index+=1;
717 								goto phase_begin;
718 							case CONTINUE:
719 								this_phase->flags |= F_PENDING;
720 								break;
721 							case NOOP:
722 								goto phase_begin;
723 						}
724 						this_phase->i+=1;
725 					}
726 				}
727 			break;
728 
729 		case INTER:
730 			while(this_phase->curr->next){
731 				this_phase->curr=this_phase->curr->next;
732 				this_phase->state.status=NOMATCH;
733 				for(this_phase->i=0;this_phase->i<this_phase->curr->len;this_phase->i+=1){
734 					c=UCP(this_phase->curr->data)[this_phase->i];
735 					offset_t next = get_offset(this_codec, &this_phase->state, c);
736 					if(next){
737 						this_phase->offset = next;
738 					}else if(!(this_phase->flags & F_LOOPBACK)){
739 						this_phase->offset=0;
740 					}
741 					this_phase->state=read_state(this_codec, this_phase->offset);
742 					switch(this_phase->state.status){
743 						case DEADEND:
744 							goto inter_deadend;
745 							break;
746 						case SUBROUTINE:
747 						case SUBMATCH_SUBROUTINE:
748 							this_phase->flags |= F_LOOPBACK;
749 							break;
750 					}
751 				}
752 				inter_x:
753 				switch(this_phase->state.status){
754 					case NOMATCH:
755 						ins->phase_index+=1;
756 						goto phase_begin;
757 					case DEADEND:
758 						inter_deadend:
759 						this_phase->flags &= ~(F_PENDING | F_LOOPBACK);
760 						if(this_phase->flags & F_MATCH){
761 							if(this_phase->match_data){
762 								LISTCPY_ST(ins, this_phase->data_tail, this_phase->match_data, this_codec->data_z);
763 
764 								LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
765 								this_phase->curr=prev_phase->data_head;
766 							}else if(this_codec->cbflush){
767 								this_codec->cbflush(ins);
768 							}
769 
770 							this_phase->flags &= ~F_MATCH;
771 							RESET(ins->phase_index);
772 							goto phase_begin;
773 						}else if(this_phase->index < this_phase->codecn){
774 							this_phase->index++;
775 							this_codec=THIS_CODEC(ins);
776 
777 							this_phase->state=read_state(this_codec, 0);
778 
779 							this_phase->curr=prev_phase->data_head;
780 							continue;
781 						}else{
782 							data_ptr=prev_phase->data_head->next;
783 							prev_phase->data_head->next=prev_phase->data_head->next->next;
784 							this_phase->curr=prev_phase->data_head;
785 							data_ptr->next=NULL;
786 							this_phase->data_tail->next=data_ptr;
787 							this_phase->data_tail=data_ptr;
788 							if(prev_phase->data_tail==data_ptr){
789 								prev_phase->data_tail=prev_phase->data_head;
790 							}
791 
792 							RESET(ins->phase_index);
793 
794 							ins->phase_index+=1;
795 							goto phase_begin;
796 						}
797 						break;
798 					case MATCH:
799 						this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
800 						this_phase->match_data=NULL;
801 
802 						LISTCPY_ST(ins, this_phase->data_tail, this_phase->state.data, this_codec->data_z);
803 
804 						this_phase->bak=this_phase->curr->next;
805 						LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
806 						this_phase->curr=prev_phase->data_head;
807 
808 						RESET(ins->phase_index);
809 
810 						ins->phase_index+=1;
811 						goto phase_begin;
812 					case SUBMATCH:
813 						this_phase->flags |= (F_MATCH | F_PENDING);
814 						this_phase->match_data=this_phase->state.data;
815 
816 						if(this_phase->curr->next){
817 							this_phase->bak=this_phase->curr->next;
818 						}else{
819 							DATA_MALLOC(ins, prev_phase->data_tail->next);
820 							this_phase->bak=prev_phase->data_tail->next;
821 							prev_phase->data_tail=prev_phase->data_tail->next;
822 							prev_phase->data_tail->next=NULL;
823 							prev_phase->data_tail->len=0;
824 							prev_phase->data_tail->flags=0;
825 						}
826 
827 						break;
828 					case SUBROUTINE:
829 					case SUBMATCH_SUBROUTINE:
830 						this_codec->cbconv(ins);
831 						goto inter_x;
832 					case NEXTPHASE:
833 						this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
834 						this_phase->match_data=NULL;
835 
836 						this_phase->bak=this_phase->curr->next;
837 						LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
838 						this_phase->curr=prev_phase->data_head;
839 
840 						RESET(ins->phase_index);
841 
842 						ins->phase_index+=1;
843 						goto phase_begin;
844 					case CONTINUE:
845 						this_phase->flags |= F_PENDING;
846 						break;
847 					case NOOP:
848 						goto phase_begin;
849 
850 				}
851 				offset_t next = get_offset(this_codec, &this_phase->state, 256);
852 				if(next){
853 					this_phase->offset = next;
854 				}else if(!(this_phase->flags & F_LOOPBACK)){
855 					this_phase->offset=0;
856 				}
857 				this_phase->state=read_state(this_codec, this_phase->offset);
858 				if(this_phase->state.status==DEADEND){ goto inter_deadend;}
859 			}
860 			break;
861 
862 		case TO:
863 			while(this_phase->curr->next){
864 				this_phase->curr=this_phase->curr->next;
865 				this_phase->state.status=NOMATCH;
866 				for(this_phase->i=0;this_phase->i<this_phase->curr->len;this_phase->i+=1){
867 					c=UCP(this_phase->curr->data)[this_phase->i];
868 					offset_t next = get_offset(this_codec, &this_phase->state, c);
869 					if(next){
870 						this_phase->offset = next;
871 					}else if(!(this_phase->flags & F_LOOPBACK)){
872 						this_phase->offset=0;
873 					}
874 					this_phase->state=read_state(this_codec, this_phase->offset);
875 					switch(this_phase->state.status){
876 						case DEADEND:
877 							goto to_deadend;
878 							break;
879 						case SUBROUTINE:
880 						case SUBMATCH_SUBROUTINE:
881 							this_phase->flags |= F_LOOPBACK;
882 							break;
883 					}
884 				}
885 				to_x:
886 				switch(this_phase->state.status){
887 					case NOMATCH:
888 						continue;
889 					case DEADEND:
890 						to_deadend:
891 						this_phase->flags &= ~(F_PENDING | F_LOOPBACK);
892 						if(this_phase->flags & F_MATCH){
893 							if(this_phase->match_data){
894 								LISTCPY_ST(ins, this_phase->data_tail, this_phase->match_data, this_codec->data_z);
895 
896 								LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
897 								this_phase->curr=prev_phase->data_head;
898 							}else if(this_codec->cbflush){
899 								this_codec->cbflush(ins);
900 							}
901 
902 							this_phase->flags &= ~F_MATCH;
903 							RESET(ins->phase_index);
904 							ins->phase_index+=1;
905 							goto phase_begin;
906 						}else if(this_phase->index < this_phase->codecn){
907 							this_phase->index++;
908 							this_codec=THIS_CODEC(ins);
909 
910 							this_phase->state=read_state(this_codec, 0);
911 
912 							this_phase->curr=prev_phase->data_head;
913 							continue;
914 						}else{
915 							*(ins->oerr)+=1;
916 
917 							RESET(ins->phase_index);
918 							this_codec=THIS_CODEC(ins);
919 
920 							this_phase->bak=this_phase->curr->next;
921 							LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
922 							this_phase->bak=this_phase->curr=prev_phase->data_head;
923 
924 							continue;
925 						}
926 						break;
927 					case MATCH:
928 						this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
929 						this_phase->match_data=NULL;
930 
931 						LISTCPY_ST(ins, this_phase->data_tail, this_phase->state.data, this_codec->data_z);
932 
933 						this_phase->bak=this_phase->curr->next;
934 						LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
935 						this_phase->curr=prev_phase->data_head;
936 
937 						RESET(ins->phase_index);
938 						ins->phase_index+=1;
939 						goto phase_begin;
940 					case SUBMATCH:
941 						this_phase->flags |= (F_MATCH | F_PENDING);
942 						this_phase->match_data=this_phase->state.data;
943 						if(this_phase->curr->next){
944 							this_phase->bak=this_phase->curr->next;
945 						}else{
946 							DATA_MALLOC(ins, prev_phase->data_tail->next);
947 							this_phase->bak=prev_phase->data_tail->next;
948 							prev_phase->data_tail=prev_phase->data_tail->next;
949 							prev_phase->data_tail->next=NULL;
950 							prev_phase->data_tail->len=0;
951 							prev_phase->data_tail->flags=0;
952 						}
953 
954 						break;
955 					case SUBROUTINE:
956 					case SUBMATCH_SUBROUTINE:
957 						this_codec->cbconv(ins);
958 						goto to_x;
959 					case NEXTPHASE:
960 						this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
961 						this_phase->match_data=NULL;
962 
963 						this_phase->bak=this_phase->curr->next;
964 						LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
965 						this_phase->curr=prev_phase->data_head;
966 
967 						RESET(ins->phase_index);
968 
969 						ins->phase_index+=1;
970 						goto phase_begin;
971 					case CONTINUE:
972 						this_phase->flags|=F_PENDING;
973 						this_phase->flags &= ~F_LOOPBACK;
974 						break;
975 					case NOOP:
976 						goto phase_begin;
977 				}
978 				offset_t next = get_offset(this_codec, &this_phase->state, 256);
979 				if(next){
980 					this_phase->offset = next;
981 				}else if(!(this_phase->flags & F_LOOPBACK)){
982 					this_phase->offset=0;
983 				}
984 				this_phase->state=read_state(this_codec, this_phase->offset);
985 				if(this_phase->state.status==DEADEND){ goto to_deadend;}
986 			}
987 			break;
988 		}
989 		ins->phase_index+=1;
990 	}
991 
992 	//check back (phase-loop)
993 	for(ins->phase_index=ins->phasen;ins->phase_index>0;ins->phase_index-=1){
994 		if(ins->phase[ins->phase_index].curr->next){
995 			goto phase_begin;
996 		}
997 	}
998 
999 	//flush
1000 	if(ins->flush){
1001 		for(ins->phase_index=1;ins->phase_index<=ins->phasen;++(ins->phase_index)){
1002 			if(THIS_PHASE(ins)->flags & F_PENDING){
1003 				prev_phase=PREV_PHASE(ins);
1004 				this_phase=THIS_PHASE(ins);
1005 				this_codec=THIS_CODEC(ins);
1006 				switch(this_phase->type){
1007 					case FROM:	goto from_deadend;
1008 					case INTER:	goto inter_deadend;
1009 					case TO:	goto to_deadend;
1010 				}
1011 			}
1012 		}
1013 	}
1014 
1015 	struct bsdconv_phase *last_phase =  LAST_PHASE(ins);
1016 	//output
1017 	switch(ins->output_mode){
1018 		case BSDCONV_HOLD:
1019 			ins->output.len=0;
1020 			ins->output.flags=0;
1021 			break;
1022 		case BSDCONV_AUTOMALLOC:
1023 			i=ins->output.len;
1024 			data_ptr=last_phase->data_head->next;
1025 			while(data_ptr){
1026 				i+=data_ptr->len;
1027 				data_ptr=data_ptr->next;
1028 			}
1029 			last_phase->data_tail=last_phase->data_head;
1030 			ins->output.flags=1;
1031 			ptr=ins->output.data=malloc(i);
1032 			ins->output.len=i-ins->output.len;
1033 			data_ptr=last_phase->data_head;
1034 			while(last_phase->data_head->next){
1035 				data_ptr=last_phase->data_head->next;
1036 				memcpy(ptr, data_ptr->data, data_ptr->len);
1037 				ptr+=data_ptr->len;
1038 				last_phase->data_head->next=last_phase->data_head->next->next;
1039 				DATUM_FREE(ins, data_ptr);
1040 			}
1041 			break;
1042 		case BSDCONV_PREMALLOCED:
1043 			ins->output.flags=0;
1044 			if(ins->output.data!=NULL && ins->output.len){
1045 				i=0;
1046 				while(last_phase->data_head->next && last_phase->data_head->next->len<=ins->output.len-i){
1047 					memcpy(ins->output.data+i, last_phase->data_head->next->data, last_phase->data_head->next->len);
1048 					i+=last_phase->data_head->next->len;
1049 					if(last_phase->data_tail==last_phase->data_head->next){
1050 						last_phase->data_tail=last_phase->data_head;
1051 					}
1052 					data_ptr=last_phase->data_head->next;
1053 					last_phase->data_head->next=last_phase->data_head->next->next;
1054 					DATUM_FREE(ins, data_ptr);
1055 				}
1056 				ins->output.len=i;
1057 			}else{
1058 				i=0;
1059 				data_ptr=last_phase->data_head;
1060 				while(data_ptr){
1061 					i+=data_ptr->len;
1062 					data_ptr=data_ptr->next;
1063 				}
1064 				ins->output.len=i;
1065 			}
1066 			break;
1067 		case BSDCONV_FILE:
1068 			fp=ins->output.data;
1069 			while(last_phase->data_head->next){
1070 				data_ptr=last_phase->data_head->next;
1071 				fwrite(data_ptr->data, data_ptr->len, 1, fp);
1072 				last_phase->data_head->next=last_phase->data_head->next->next;
1073 				DATUM_FREE(ins, data_ptr);
1074 			}
1075 			last_phase->data_tail=last_phase->data_head;
1076 			break;
1077 		case BSDCONV_FD:
1078 			fd=(intptr_t)ins->output.data;
1079 			while(last_phase->data_head->next){
1080 				data_ptr=last_phase->data_head->next;
1081 				write(fd, data_ptr->data, data_ptr->len);
1082 				last_phase->data_head->next=last_phase->data_head->next->next;
1083 				DATUM_FREE(ins, data_ptr);
1084 			}
1085 			last_phase->data_tail=last_phase->data_head;
1086 			break;
1087 		case BSDCONV_NULL:
1088 			while(last_phase->data_head->next){
1089 				data_ptr=last_phase->data_head->next;
1090 				last_phase->data_head->next=last_phase->data_head->next->next;
1091 				DATUM_FREE(ins, data_ptr);
1092 			}
1093 			last_phase->data_tail=last_phase->data_head;
1094 			break;
1095 		case BSDCONV_PASS:
1096 			inso=ins->output.data;
1097 			if(last_phase->data_head->next){
1098 				inso->input=*(last_phase->data_head->next);
1099 				data_ptr=last_phase->data_head->next->next;
1100 				last_phase->data_head->next->flags &= ~F_FREE;
1101 				DATUM_FREE(ins, last_phase->data_head->next);
1102 				last_phase->data_head->next=data_ptr;
1103 			}
1104 			struct data_rt *tail;
1105 			tail=&inso->input;
1106 			while(last_phase->data_head->next){
1107 				tail->next=dup_data_rt(inso, last_phase->data_head->next);
1108 				tail=tail->next;
1109 				data_ptr=last_phase->data_head->next->next;
1110 				DATUM_FREE(ins, last_phase->data_head->next);
1111 				last_phase->data_head->next=data_ptr;
1112 			}
1113 			last_phase->data_tail=last_phase->data_head;
1114 			break;
1115 	}
1116 	return;
1117 }
1118 
bsdconv_error(void)1119 char * bsdconv_error(void){
1120 	switch(GetLastError()){
1121 		case EDOOFUS:
1122 				return strdup("Unexpected condition");
1123 		case EOPNOTSUPP:
1124 				return strdup("Unsupported charset/encoding or filter");
1125 		case ENOMEM:
1126 				return strdup("Mmap failed");
1127 		case EINVAL:
1128 				return strdup("Conversion syntax error");
1129 		default:
1130 				return strdup("Unknown error");
1131 	}
1132 }
1133