1 #define USE_HEX_MAP
2
3 #include <unistd.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <sys/param.h>
7 #include <sys/types.h>
8 #include <stdint.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <dirent.h>
12 #include "bsdconv.h"
13 #ifdef WIN32
14 #include <windows.h>
15 #else
16 #include <fcntl.h>
17 #include <errno.h>
18 #include <sys/stat.h>
19 #include <sys/mman.h>
20 #endif
21
22 #ifndef MAP_PREFAULT_READ
23 #define MAP_PREFAULT_READ 0
24 #endif
25
26 #ifdef WIN32
27 #define MODULES_SUBPATH "modules"
28 #else
29 #define MODULES_SUBPATH "share/bsdconv"
30 #endif
31
32 struct bsdconv_instance *bsdconv_unpack(const char *);
33 char *bsdconv_pack(struct bsdconv_instance *);
34
35 #include "libbsdconv_counter.c"
36 #include "libbsdconv_filter.c"
37 #include "libbsdconv_scorer.c"
38 #include "libbsdconv_hash.c"
39 #include "libbsdconv_module.c"
40 #include "libbsdconv_util.c"
41
_cbcreate(struct bsdconv_instance * ins,int p,int c)42 static inline int _cbcreate(struct bsdconv_instance *ins, int p, int c){
43 int r;
44 char *argv;
45 if(ins->phase[p].codec[c].argv)
46 argv=strdup(ins->phase[p].codec[c].argv);
47 else
48 argv=strdup("");
49 char *cur=argv;
50 char *k;
51 struct bsdconv_hash_entry *arg=NULL, *tmp;
52 struct bsdconv_hash_entry **last=&arg;
53 if(*cur){
54 while((k=strsep(&cur, "&"))!=NULL){
55 *last=malloc(sizeof(struct bsdconv_hash_entry));
56 (*last)->key=k;
57 (*last)->ptr=strchr(k, '=');
58 if((*last)->ptr){
59 *CP((*last)->ptr)=0;
60 (*last)->ptr+=1;
61 }
62 (*last)->next=NULL;
63 last=&((*last)->next);
64 }
65 }
66 r=ins->phase[p].codec[c].cbcreate(ins, arg);
67 free(argv);
68 while(arg){
69 tmp=arg->next;
70 free(arg);
71 arg=tmp;
72 }
73 return r;
74 }
75
_loadcodec(struct bsdconv_codec * cd,char * path)76 int _loadcodec(struct bsdconv_codec *cd, char *path){
77 #ifdef WIN32
78 if ((cd->fd=CreateFile(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL))==INVALID_HANDLE_VALUE){
79 SetLastError(EOPNOTSUPP);
80 return 0;
81 }
82 cd->md=CreateFileMapping(cd->fd, NULL, PAGE_READONLY, 0,0, NULL);
83 if(!cd->md){
84 CloseHandle(cd->fd);
85 SetLastError(ENOMEM);
86 return 0;
87 }
88 cd->data_z=cd->z=MapViewOfFile(cd->md, FILE_MAP_READ, 0,0,0);
89 if(!cd->z){
90 CloseHandle(cd->md);
91 CloseHandle(cd->fd);
92 SetLastError(ENOMEM);
93 return 0;
94 }
95 #else
96 struct stat stat;
97 if((cd->fd=open(path, O_RDONLY))==-1){
98 SetLastError(EOPNOTSUPP);
99 return 0;
100 }
101 fstat(cd->fd, &stat);
102 cd->maplen=stat.st_size;
103 if((cd->data_z=cd->z=mmap(0, stat.st_size, PROT_READ, MAP_PRIVATE | MAP_PREFAULT_READ, cd->fd, 0))==MAP_FAILED){
104 close(cd->fd);
105 SetLastError(ENOMEM);
106 return 0;
107 }
108 #endif
109
110 cd->dl=NULL;
111 cd->cbcreate=NULL;
112 cd->cbinit=NULL;
113 cd->cbctl=NULL;
114 cd->cbconv=NULL;
115 cd->cbflush=NULL;
116 cd->cbdestroy=NULL;
117 strcat(path, "." SHLIBEXT);
118
119 if((cd->dl=OPEN_SHAREOBJECT(path))){
120 cd->cbconv=SHAREOBJECT_SYMBOL(cd->dl,"cbconv");
121 cd->cbflush=SHAREOBJECT_SYMBOL(cd->dl,"cbflush");
122 cd->cbcreate=SHAREOBJECT_SYMBOL(cd->dl,"cbcreate");
123 cd->cbinit=SHAREOBJECT_SYMBOL(cd->dl,"cbinit");
124 cd->cbctl=SHAREOBJECT_SYMBOL(cd->dl,"cbctl");
125 cd->cbdestroy=SHAREOBJECT_SYMBOL(cd->dl,"cbdestroy");
126 }
127
128 return 1;
129 }
130
loadcodec(struct bsdconv_codec * cd,int type)131 int loadcodec(struct bsdconv_codec *cd, int type){
132 char *cwd;
133 char *c;
134 char buf[PATH_MAX+1];
135 char *upper;
136 cwd=getcwd(NULL, 0);
137 if((c=getenv("BSDCONV_PATH"))){
138 chdir(c);
139 }else{
140 chdir(BSDCONV_PATH);
141 }
142 chdir(MODULES_SUBPATH);
143 switch(type){
144 case FROM:
145 chdir("from");
146 break;
147 case INTER:
148 chdir("inter");
149 break;
150 case TO:
151 chdir("to");
152 break;
153 }
154 upper=strdup(cd->desc);
155 strtoupper(upper);
156 REALPATH(upper, buf);
157 chdir(cwd);
158 free(cwd);
159 free(upper);
160 if(!_loadcodec(cd, buf))
161 return 0;
162 return 1;
163 }
164
unloadcodec(struct bsdconv_codec * cd)165 void unloadcodec(struct bsdconv_codec *cd){
166 if(cd->dl){
167 CLOSE_SHAREOBJECT(cd->dl);
168 }
169 #ifdef WIN32
170 UnmapViewOfFile(cd->z);
171 CloseHandle(cd->md);
172 CloseHandle(cd->fd);
173 #else
174 munmap(cd->z, cd->maplen);
175 close(cd->fd);
176 #endif
177 }
178
bsdconv_init(struct bsdconv_instance * ins)179 void bsdconv_init(struct bsdconv_instance *ins){
180 int i, j;
181 struct data_rt *data_ptr;
182
183 ins->flush=0;
184 ins->input.data=NULL;
185 ins->input.flags=0;
186 ins->input.len=0;
187 ins->output.data=NULL;
188 ins->output.len=0;
189 ins->output_mode=BSDCONV_HOLD;
190
191 for(i=0;i<=ins->phasen;++i){
192 ins->phase_index=i;
193 ins->phase[i].flags=0;
194 ins->phase[i].offset=0;
195 while(ins->phase[i].data_head->next){
196 data_ptr=ins->phase[i].data_head->next;
197 ins->phase[i].data_head->next=ins->phase[i].data_head->next->next;
198 if(data_ptr->flags & F_FREE)
199 free(data_ptr->data);
200 free(data_ptr);
201 }
202 ins->phase[i].data_tail=ins->phase[i].data_head;
203 ins->phase[i].data_head->len=0;
204 ins->phase[i].match_data=NULL;
205 if(i>0){
206 ins->phase[i].curr=ins->phase[i-1].data_head;
207 for(j=0;j<=ins->phase[i].codecn;++j){
208 ins->phase[i].index=j;
209 if(ins->phase[i].codec[j].cbinit)
210 ins->phase[i].codec[j].cbinit(ins);
211 }
212 RESET(i);
213 }
214 }
215 }
216
bsdconv_ctl(struct bsdconv_instance * ins,int ctl,void * p,int v)217 void bsdconv_ctl(struct bsdconv_instance *ins, int ctl, void *p, int v){
218 int i, j;
219 for(i=1;i<=ins->phasen;++i){
220 for(j=0;j<=ins->phase[i].codecn;++j){
221 if(ins->phase[i].codec[j].cbctl){
222 ins->phase_index=i;
223 ins->phase[i].index=j;
224 ins->phase[i].codec[j].cbctl(ins, ctl, p, v);
225 }
226 }
227 }
228 }
229
bsdconv_pack(struct bsdconv_instance * ins)230 char *bsdconv_pack(struct bsdconv_instance *ins){
231 char *ret;
232 char *t, *cur;
233 const char *end;
234 int len=0;
235 int i, j, n;
236 for(i=1;i<=ins->phasen;++i){
237 for(j=0;j<=ins->phase[i].codecn;++j){
238 len+=strlen(ins->phase[i].codec[j].desc);
239 t=ins->phase[i].codec[j].desc;
240 n=1;
241 while(*t){
242 if(*t==',')
243 n+=1;
244 t++;
245 }
246 if(ins->phase[i].codec[j].argv)
247 len+=(strlen(ins->phase[i].codec[j].argv)+1)*n;
248 len+=1;
249 }
250 }
251 ret=malloc(sizeof(char) * len);
252 ret[0]=0;
253 for(i=1;i<=ins->phasen;++i){
254 for(j=0;j<=ins->phase[i].codecn;++j){
255 if(j==0){
256 if(i>1){
257 switch(ins->phase[i].type){
258 case FROM:
259 strcat(ret, "|");
260 break;
261 case INTER:
262 case TO:
263 strcat(ret, ":");
264 break;
265 }
266 }
267 }else{
268 strcat(ret, ",");
269 }
270 t=ins->phase[i].codec[j].desc;
271 while(1){
272 cur=strchr(t, ',');
273 if(cur){
274 end=",";
275 *cur=0;
276 }else{
277 end="";
278 }
279 strcat(ret, t);
280 if(ins->phase[i].codec[j].argv && *(ins->phase[i].codec[j].argv)){
281 if(strchr(t,'#')==NULL)
282 strcat(ret, "#");
283 else
284 strcat(ret, "&");
285 strcat(ret, ins->phase[i].codec[j].argv);
286 }
287 strcat(ret, end);
288 if(cur)
289 t=cur+1;
290 else
291 break;
292 }
293 }
294 }
295 return ret;
296 }
297
bsdconv_unpack(const char * _conversion)298 struct bsdconv_instance *bsdconv_unpack(const char *_conversion){
299 struct bsdconv_instance *ins=malloc(sizeof(struct bsdconv_instance));
300 char *conversion;
301 char *t, *t1;
302 int i, j;
303 int f=0;
304
305 ins->pool=NULL;
306 ins->hash=NULL;
307 ins->counter=NULL;
308 ins->input.flags=0;
309 ins->output.flags=0;
310
311 conversion=strdup(_conversion);
312 t1=t=conversion;
313 i=1;
314 for(t=(char *)conversion;*t;t++){
315 if(*t==':' || *t=='|')++i;
316 }
317 ins->phasen=i;
318 char *phase_off[i+1];
319
320 ins->phase=malloc(sizeof(struct bsdconv_phase) * (i+1));
321
322 i=1;
323 t1=t=conversion;
324 while((t1=strsep(&t, "|")) != NULL){
325 if(f>1){
326 ins->phase[i-f].type=FROM;
327 ins->phase[i-1].type=TO;
328 }
329 f=0;
330 while((phase_off[i]=strsep(&t1, ":"))!=NULL){
331 ins->phase[i].type=INTER;
332 i+=1;
333 f+=1;
334 }
335 }
336 if(f>1){
337 ins->phase[i-f].type=FROM;
338 ins->phase[i-1].type=TO;
339 }
340 ins->phase[0].type=_INPUT;
341
342 for(i=1;i<=ins->phasen;++i){
343 if(*phase_off[i]){
344 ins->phase[i].codecn = 0;
345 for(t=(char *)phase_off[i];*t;t++){
346 if(*t==','){
347 ins->phase[i].codecn+=1;
348 }
349 }
350 }else{ // empty phase
351 free(ins->phase);
352 free(ins);
353 free(conversion);
354 return NULL;
355 }
356 }
357 for(i=1;i<=ins->phasen;++i){
358 ins->phase[i].codec=malloc((ins->phase[i].codecn + 1)* sizeof(struct bsdconv_codec));
359 }
360 for(i=1;i<=ins->phasen;++i){
361 t=phase_off[i];
362 for(j=0;j<=ins->phase[i].codecn;++j){
363 ins->phase[i].codec[j].desc=strdup(strsep(&t, ","));
364 ins->phase[i].codec[j].argv=strchr(ins->phase[i].codec[j].desc, '#');
365 if(ins->phase[i].codec[j].argv){
366 *(ins->phase[i].codec[j].argv)=0;
367 ins->phase[i].codec[j].argv+=1;
368 }
369 if(ins->phase[i].codec[j].desc[0]==0){
370 for(;j>=0;--j){
371 free(ins->phase[i].codec[j].desc);
372 }
373 for(i=1;i<=ins->phasen;++i){
374 free(ins->phase[i].codec);
375 }
376 free(ins->phase);
377 free(ins);
378 free(conversion);
379 return NULL;
380 }
381 }
382 }
383 free(conversion);
384 return ins;
385 }
386
bsdconv_create(const char * _conversion)387 struct bsdconv_instance *bsdconv_create(const char *_conversion){
388 int e=0;
389 struct bsdconv_instance *ins=NULL;
390 char *conversion=malloc(strlen(_conversion)+1);
391 int i, j;
392 char *c;
393 const char *d;
394 char whitespace[256]={0};
395 whitespace['\r']=1;
396 whitespace['\n']=1;
397 whitespace['\t']=1;
398 whitespace['\f']=1;
399 whitespace[' ']=1;
400 d=_conversion;
401 c=conversion;
402 while(*d){
403 if(whitespace[*UCP(d)]==0){
404 *c=*d;
405 c+=1;
406 }
407 d+=1;
408 }
409 *c=0;
410
411 i=0;
412 while(i==0 || i<=ins->phasen){
413 start_parse:
414 ins=bsdconv_unpack(conversion);
415 if(ins==NULL){
416 free(conversion);
417 SetLastError(EINVAL);
418 return NULL;
419 }
420 for(i=1;i<=ins->phasen;++i){
421 for(j=0;j<=ins->phase[i].codecn;++j){
422 if(!bsdconv_module_check(ins->phase[i].type, ins->phase[i].codec[j].desc)){
423 if(bsdconv_module_vital(ins->phase[i].type, ins->phase[i].codec[j].desc)){
424 printf("error %s\n", ins->phase[i].codec[j].desc);
425 free(conversion);
426 bsdconv_destroy(ins);
427 SetLastError(EDOOFUS);
428 return NULL;
429 }
430 c=bsdconv_solve_alias(ins->phase[i].type, ins->phase[i].codec[j].desc);
431 if(c==NULL){
432 e=1;
433 }else{
434 if(strcmp(c, ins->phase[i].codec[j].desc)==0)
435 e=1;
436 free(ins->phase[i].codec[j].desc);
437 ins->phase[i].codec[j].desc=c;
438 }
439 free(conversion);
440 conversion=bsdconv_pack(ins);
441 for(i=1;i<=ins->phasen;++i){
442 for(j=0;j<=ins->phase[i].codecn;++j){
443 free(ins->phase[i].codec[j].desc);
444 }
445 free(ins->phase[i].codec);
446 }
447 free(ins->phase);
448 free(ins);
449 if(e){
450 SetLastError(EOPNOTSUPP);
451 free(conversion);
452 return NULL;
453 }
454 goto start_parse;
455 }
456 }
457 }
458 }
459 for(i=1;i<=ins->phasen;++i){
460 for(j=0;j<=ins->phase[i].codecn;++j){
461 if(!loadcodec(&ins->phase[i].codec[j], ins->phase[i].type)){
462 free(ins->phase[i].codec[j].desc);
463 j-=1;
464 for(;i>=1;j=ins->phase[--i].codecn){
465 for(;j>=0;--j){
466 free(ins->phase[i].codec[j].desc);
467 unloadcodec(&ins->phase[i].codec[j]);
468 }
469 }
470 goto bsdconv_create_error;
471 }
472 }
473 }
474
475 ins->ierr=bsdconv_counter(ins, "IERR");
476 ins->oerr=bsdconv_counter(ins, "OERR");
477
478 for(i=1;i<=ins->phasen;++i){
479 for(j=0;j<=ins->phase[i].codecn;++j){
480 if(ins->phase[i].codec[j].cbcreate){
481 ins->phase_index=i;
482 ins->phase[i].index=j;
483 e=_cbcreate(ins, i, j);
484 if(e){
485 for(j=j-1;j>=0;j-=1){
486 if(ins->phase[i].codec[j].cbdestroy){
487 ins->phase_index=i;
488 ins->phase[i].index=j;
489 ins->phase[i].codec[j].cbdestroy(ins);
490 }
491 }
492 for(i=i-1;i>=1;i-=1){
493 for(j=0;j<=ins->phase[i].codecn;++j){
494 if(ins->phase[i].codec[j].cbdestroy){
495 ins->phase_index=i;
496 ins->phase[i].index=j;
497 ins->phase[i].codec[j].cbdestroy(ins);
498 }
499 }
500 }
501 for(i=1;i<=ins->phasen;++i){
502 for(j=0;j<=ins->phase[i].codecn;++j){
503 free(ins->phase[i].codec[j].desc);
504 unloadcodec(&ins->phase[i].codec[j]);
505 }
506 }
507 SetLastError(e);
508 goto bsdconv_create_error;
509 }
510 }
511 }
512 }
513 for(i=0;i<=ins->phasen;++i){
514 ins->phase[i].data_head=malloc(sizeof(struct data_rt));
515 ins->phase[i].data_head->next=NULL;
516 ins->phase[i].data_head->flags=0;
517 }
518
519 free(conversion);
520 return ins;
521
522 bsdconv_create_error:
523 for(i=1;i<=ins->phasen;++i){
524 free(ins->phase[i].codec);
525 }
526
527 free(conversion);
528 free(ins->phase);
529
530 void *p;
531 while(ins->hash){
532 free(ins->hash->key);
533 p=ins->hash->next;
534 free(ins->hash);
535 ins->hash=p;
536 }
537 while(ins->counter){
538 free(ins->counter->key);
539 p=ins->counter->next;
540 free(ins->counter);
541 ins->counter=p;
542 }
543 free(ins);
544 return NULL;
545 }
546
bsdconv_destroy(struct bsdconv_instance * ins)547 void bsdconv_destroy(struct bsdconv_instance *ins){
548 int i, j;
549 struct data_rt *data_ptr;
550 void *p;
551
552 for(i=0;i<=ins->phasen;++i){
553 if(i>0){
554 for(j=0;j<=ins->phase[i].codecn;++j){
555 free(ins->phase[i].codec[j].desc);
556 if(ins->phase[i].codec[j].cbdestroy){
557 ins->phase_index=i;
558 ins->phase[i].index=j;
559 ins->phase[i].codec[j].cbdestroy(ins);
560 }
561 unloadcodec(&ins->phase[i].codec[j]);
562 }
563 free(ins->phase[i].codec);
564 }
565 while(ins->phase[i].data_head){
566 data_ptr=ins->phase[i].data_head;
567 ins->phase[i].data_head=ins->phase[i].data_head->next;
568 if(data_ptr->flags & F_FREE)
569 free(data_ptr->data);
570 free(data_ptr);
571 }
572 }
573 while(ins->pool){
574 data_ptr=ins->pool;
575 ins->pool=ins->pool->next;
576 free(data_ptr);
577 }
578 free(ins->phase);
579 while(ins->hash){
580 free(ins->hash->key);
581 p=ins->hash->next;
582 free(ins->hash);
583 ins->hash=p;
584 }
585 while(ins->counter){
586 free(ins->counter->key);
587 p=ins->counter->next;
588 free(ins->counter);
589 ins->counter=p;
590 }
591 free(ins);
592 }
593
bsdconv(struct bsdconv_instance * ins)594 void bsdconv(struct bsdconv_instance *ins){
595 struct bsdconv_instance *inso;
596 uintptr_t i;
597 struct data_rt *data_ptr;
598 char *ptr;
599 FILE *fp;
600 int fd;
601 unsigned char c;
602 struct bsdconv_phase *prev_phase;
603 struct bsdconv_phase *this_phase;
604 struct bsdconv_codec *this_codec;
605
606 if(ins->input.data!=NULL){
607 DATA_MALLOC(ins, ins->phase[0].data_tail->next);
608 ins->phase[0].data_tail=ins->phase[0].data_tail->next;
609 *(ins->phase[0].data_tail)=ins->input;
610 ins->input.data=NULL;
611 ins->input.len=0;
612 ins->input.flags=0;
613 }
614
615 ins->phase_index=1;
616
617 phase_begin:
618 if(ins->phase_index>0 && ins->phase_index<=ins->phasen){
619 prev_phase=PREV_PHASE(ins);
620 this_phase=THIS_PHASE(ins);
621 this_codec=THIS_CODEC(ins);
622 switch(this_phase->type){
623 case FROM:
624 while(this_phase->curr->next){
625 if(this_phase->curr == prev_phase->data_head) this_phase->i=this_phase->data_head->len;
626 else this_phase->i=0;
627 this_phase->curr=this_phase->curr->next;
628 while(this_phase->i<this_phase->curr->len){
629 c=UCP(this_phase->curr->data)[this_phase->i];
630 offset_t next = get_offset(this_codec, &this_phase->state, c);
631 if(next){
632 this_phase->offset = next;
633 }else if(!(this_phase->flags & F_LOOPBACK)){
634 this_phase->offset=0;
635 }
636 this_phase->state=read_state(this_codec, this_phase->offset);
637 from_x:
638 switch(this_phase->state.status){
639 case DEADEND:
640 from_deadend:
641 this_phase->flags &= ~(F_PENDING | F_LOOPBACK);
642 if(this_phase->flags & F_MATCH){
643 if(this_phase->match_data){
644 LISTCPY_ST(ins, this_phase->data_tail, this_phase->match_data, this_codec->data_z);
645
646 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
647 this_phase->curr=prev_phase->data_head;
648 this_phase->i=this_phase->data_head->len;
649 }else if(this_codec->cbflush){
650 this_codec->cbflush(ins);
651 }
652 this_phase->flags &= ~F_MATCH;
653 RESET(ins->phase_index);
654 goto phase_begin;
655 }else if(this_phase->index < this_phase->codecn){
656 this_phase->index++;
657 this_codec=THIS_CODEC(ins);
658
659 this_phase->state=read_state(this_codec, 0);
660
661 this_phase->curr=prev_phase->data_head;
662 this_phase->i=this_phase->data_head->len;
663 continue;
664 }else{
665 *(ins->ierr)+=1;
666
667 RESET(ins->phase_index);
668 this_codec=THIS_CODEC(ins);
669
670 this_phase->bak=this_phase->curr;
671 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
672 this_phase->bak=this_phase->curr=prev_phase->data_head;
673 this_phase->i=this_phase->data_head->len=this_phase->data_head->len+1;
674 continue;
675 }
676 break;
677 case MATCH:
678 this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
679 this_phase->match_data=NULL;
680
681 LISTCPY_ST(ins, this_phase->data_tail, this_phase->state.data, this_codec->data_z);
682
683 this_phase->bak=this_phase->curr;
684 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
685 this_phase->curr=prev_phase->data_head;
686 this_phase->data_head->len=this_phase->i+1;
687
688 RESET(ins->phase_index);
689
690 ins->phase_index+=1;
691 goto phase_begin;
692 case SUBMATCH:
693 this_phase->flags |= (F_MATCH | F_PENDING);
694 this_phase->match_data=this_phase->state.data;
695
696 this_phase->bak=this_phase->curr;
697 this_phase->data_head->len=this_phase->i+1;
698 break;
699 case SUBROUTINE:
700 case SUBMATCH_SUBROUTINE:
701 this_codec->cbconv(ins);
702 this_phase->flags |= F_LOOPBACK;
703
704 goto from_x;
705 case NEXTPHASE:
706 this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
707 this_phase->match_data=NULL;
708
709 this_phase->bak=this_phase->curr;
710 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
711 this_phase->curr=prev_phase->data_head;
712 this_phase->data_head->len=this_phase->i+1;
713
714 RESET(ins->phase_index);
715
716 ins->phase_index+=1;
717 goto phase_begin;
718 case CONTINUE:
719 this_phase->flags |= F_PENDING;
720 break;
721 case NOOP:
722 goto phase_begin;
723 }
724 this_phase->i+=1;
725 }
726 }
727 break;
728
729 case INTER:
730 while(this_phase->curr->next){
731 this_phase->curr=this_phase->curr->next;
732 this_phase->state.status=NOMATCH;
733 for(this_phase->i=0;this_phase->i<this_phase->curr->len;this_phase->i+=1){
734 c=UCP(this_phase->curr->data)[this_phase->i];
735 offset_t next = get_offset(this_codec, &this_phase->state, c);
736 if(next){
737 this_phase->offset = next;
738 }else if(!(this_phase->flags & F_LOOPBACK)){
739 this_phase->offset=0;
740 }
741 this_phase->state=read_state(this_codec, this_phase->offset);
742 switch(this_phase->state.status){
743 case DEADEND:
744 goto inter_deadend;
745 break;
746 case SUBROUTINE:
747 case SUBMATCH_SUBROUTINE:
748 this_phase->flags |= F_LOOPBACK;
749 break;
750 }
751 }
752 inter_x:
753 switch(this_phase->state.status){
754 case NOMATCH:
755 ins->phase_index+=1;
756 goto phase_begin;
757 case DEADEND:
758 inter_deadend:
759 this_phase->flags &= ~(F_PENDING | F_LOOPBACK);
760 if(this_phase->flags & F_MATCH){
761 if(this_phase->match_data){
762 LISTCPY_ST(ins, this_phase->data_tail, this_phase->match_data, this_codec->data_z);
763
764 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
765 this_phase->curr=prev_phase->data_head;
766 }else if(this_codec->cbflush){
767 this_codec->cbflush(ins);
768 }
769
770 this_phase->flags &= ~F_MATCH;
771 RESET(ins->phase_index);
772 goto phase_begin;
773 }else if(this_phase->index < this_phase->codecn){
774 this_phase->index++;
775 this_codec=THIS_CODEC(ins);
776
777 this_phase->state=read_state(this_codec, 0);
778
779 this_phase->curr=prev_phase->data_head;
780 continue;
781 }else{
782 data_ptr=prev_phase->data_head->next;
783 prev_phase->data_head->next=prev_phase->data_head->next->next;
784 this_phase->curr=prev_phase->data_head;
785 data_ptr->next=NULL;
786 this_phase->data_tail->next=data_ptr;
787 this_phase->data_tail=data_ptr;
788 if(prev_phase->data_tail==data_ptr){
789 prev_phase->data_tail=prev_phase->data_head;
790 }
791
792 RESET(ins->phase_index);
793
794 ins->phase_index+=1;
795 goto phase_begin;
796 }
797 break;
798 case MATCH:
799 this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
800 this_phase->match_data=NULL;
801
802 LISTCPY_ST(ins, this_phase->data_tail, this_phase->state.data, this_codec->data_z);
803
804 this_phase->bak=this_phase->curr->next;
805 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
806 this_phase->curr=prev_phase->data_head;
807
808 RESET(ins->phase_index);
809
810 ins->phase_index+=1;
811 goto phase_begin;
812 case SUBMATCH:
813 this_phase->flags |= (F_MATCH | F_PENDING);
814 this_phase->match_data=this_phase->state.data;
815
816 if(this_phase->curr->next){
817 this_phase->bak=this_phase->curr->next;
818 }else{
819 DATA_MALLOC(ins, prev_phase->data_tail->next);
820 this_phase->bak=prev_phase->data_tail->next;
821 prev_phase->data_tail=prev_phase->data_tail->next;
822 prev_phase->data_tail->next=NULL;
823 prev_phase->data_tail->len=0;
824 prev_phase->data_tail->flags=0;
825 }
826
827 break;
828 case SUBROUTINE:
829 case SUBMATCH_SUBROUTINE:
830 this_codec->cbconv(ins);
831 goto inter_x;
832 case NEXTPHASE:
833 this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
834 this_phase->match_data=NULL;
835
836 this_phase->bak=this_phase->curr->next;
837 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
838 this_phase->curr=prev_phase->data_head;
839
840 RESET(ins->phase_index);
841
842 ins->phase_index+=1;
843 goto phase_begin;
844 case CONTINUE:
845 this_phase->flags |= F_PENDING;
846 break;
847 case NOOP:
848 goto phase_begin;
849
850 }
851 offset_t next = get_offset(this_codec, &this_phase->state, 256);
852 if(next){
853 this_phase->offset = next;
854 }else if(!(this_phase->flags & F_LOOPBACK)){
855 this_phase->offset=0;
856 }
857 this_phase->state=read_state(this_codec, this_phase->offset);
858 if(this_phase->state.status==DEADEND){ goto inter_deadend;}
859 }
860 break;
861
862 case TO:
863 while(this_phase->curr->next){
864 this_phase->curr=this_phase->curr->next;
865 this_phase->state.status=NOMATCH;
866 for(this_phase->i=0;this_phase->i<this_phase->curr->len;this_phase->i+=1){
867 c=UCP(this_phase->curr->data)[this_phase->i];
868 offset_t next = get_offset(this_codec, &this_phase->state, c);
869 if(next){
870 this_phase->offset = next;
871 }else if(!(this_phase->flags & F_LOOPBACK)){
872 this_phase->offset=0;
873 }
874 this_phase->state=read_state(this_codec, this_phase->offset);
875 switch(this_phase->state.status){
876 case DEADEND:
877 goto to_deadend;
878 break;
879 case SUBROUTINE:
880 case SUBMATCH_SUBROUTINE:
881 this_phase->flags |= F_LOOPBACK;
882 break;
883 }
884 }
885 to_x:
886 switch(this_phase->state.status){
887 case NOMATCH:
888 continue;
889 case DEADEND:
890 to_deadend:
891 this_phase->flags &= ~(F_PENDING | F_LOOPBACK);
892 if(this_phase->flags & F_MATCH){
893 if(this_phase->match_data){
894 LISTCPY_ST(ins, this_phase->data_tail, this_phase->match_data, this_codec->data_z);
895
896 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
897 this_phase->curr=prev_phase->data_head;
898 }else if(this_codec->cbflush){
899 this_codec->cbflush(ins);
900 }
901
902 this_phase->flags &= ~F_MATCH;
903 RESET(ins->phase_index);
904 ins->phase_index+=1;
905 goto phase_begin;
906 }else if(this_phase->index < this_phase->codecn){
907 this_phase->index++;
908 this_codec=THIS_CODEC(ins);
909
910 this_phase->state=read_state(this_codec, 0);
911
912 this_phase->curr=prev_phase->data_head;
913 continue;
914 }else{
915 *(ins->oerr)+=1;
916
917 RESET(ins->phase_index);
918 this_codec=THIS_CODEC(ins);
919
920 this_phase->bak=this_phase->curr->next;
921 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
922 this_phase->bak=this_phase->curr=prev_phase->data_head;
923
924 continue;
925 }
926 break;
927 case MATCH:
928 this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
929 this_phase->match_data=NULL;
930
931 LISTCPY_ST(ins, this_phase->data_tail, this_phase->state.data, this_codec->data_z);
932
933 this_phase->bak=this_phase->curr->next;
934 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
935 this_phase->curr=prev_phase->data_head;
936
937 RESET(ins->phase_index);
938 ins->phase_index+=1;
939 goto phase_begin;
940 case SUBMATCH:
941 this_phase->flags |= (F_MATCH | F_PENDING);
942 this_phase->match_data=this_phase->state.data;
943 if(this_phase->curr->next){
944 this_phase->bak=this_phase->curr->next;
945 }else{
946 DATA_MALLOC(ins, prev_phase->data_tail->next);
947 this_phase->bak=prev_phase->data_tail->next;
948 prev_phase->data_tail=prev_phase->data_tail->next;
949 prev_phase->data_tail->next=NULL;
950 prev_phase->data_tail->len=0;
951 prev_phase->data_tail->flags=0;
952 }
953
954 break;
955 case SUBROUTINE:
956 case SUBMATCH_SUBROUTINE:
957 this_codec->cbconv(ins);
958 goto to_x;
959 case NEXTPHASE:
960 this_phase->flags &= ~(F_MATCH | F_PENDING | F_LOOPBACK);
961 this_phase->match_data=NULL;
962
963 this_phase->bak=this_phase->curr->next;
964 LISTFREE(ins, prev_phase->data_head, this_phase->bak, prev_phase->data_tail);
965 this_phase->curr=prev_phase->data_head;
966
967 RESET(ins->phase_index);
968
969 ins->phase_index+=1;
970 goto phase_begin;
971 case CONTINUE:
972 this_phase->flags|=F_PENDING;
973 this_phase->flags &= ~F_LOOPBACK;
974 break;
975 case NOOP:
976 goto phase_begin;
977 }
978 offset_t next = get_offset(this_codec, &this_phase->state, 256);
979 if(next){
980 this_phase->offset = next;
981 }else if(!(this_phase->flags & F_LOOPBACK)){
982 this_phase->offset=0;
983 }
984 this_phase->state=read_state(this_codec, this_phase->offset);
985 if(this_phase->state.status==DEADEND){ goto to_deadend;}
986 }
987 break;
988 }
989 ins->phase_index+=1;
990 }
991
992 //check back (phase-loop)
993 for(ins->phase_index=ins->phasen;ins->phase_index>0;ins->phase_index-=1){
994 if(ins->phase[ins->phase_index].curr->next){
995 goto phase_begin;
996 }
997 }
998
999 //flush
1000 if(ins->flush){
1001 for(ins->phase_index=1;ins->phase_index<=ins->phasen;++(ins->phase_index)){
1002 if(THIS_PHASE(ins)->flags & F_PENDING){
1003 prev_phase=PREV_PHASE(ins);
1004 this_phase=THIS_PHASE(ins);
1005 this_codec=THIS_CODEC(ins);
1006 switch(this_phase->type){
1007 case FROM: goto from_deadend;
1008 case INTER: goto inter_deadend;
1009 case TO: goto to_deadend;
1010 }
1011 }
1012 }
1013 }
1014
1015 struct bsdconv_phase *last_phase = LAST_PHASE(ins);
1016 //output
1017 switch(ins->output_mode){
1018 case BSDCONV_HOLD:
1019 ins->output.len=0;
1020 ins->output.flags=0;
1021 break;
1022 case BSDCONV_AUTOMALLOC:
1023 i=ins->output.len;
1024 data_ptr=last_phase->data_head->next;
1025 while(data_ptr){
1026 i+=data_ptr->len;
1027 data_ptr=data_ptr->next;
1028 }
1029 last_phase->data_tail=last_phase->data_head;
1030 ins->output.flags=1;
1031 ptr=ins->output.data=malloc(i);
1032 ins->output.len=i-ins->output.len;
1033 data_ptr=last_phase->data_head;
1034 while(last_phase->data_head->next){
1035 data_ptr=last_phase->data_head->next;
1036 memcpy(ptr, data_ptr->data, data_ptr->len);
1037 ptr+=data_ptr->len;
1038 last_phase->data_head->next=last_phase->data_head->next->next;
1039 DATUM_FREE(ins, data_ptr);
1040 }
1041 break;
1042 case BSDCONV_PREMALLOCED:
1043 ins->output.flags=0;
1044 if(ins->output.data!=NULL && ins->output.len){
1045 i=0;
1046 while(last_phase->data_head->next && last_phase->data_head->next->len<=ins->output.len-i){
1047 memcpy(ins->output.data+i, last_phase->data_head->next->data, last_phase->data_head->next->len);
1048 i+=last_phase->data_head->next->len;
1049 if(last_phase->data_tail==last_phase->data_head->next){
1050 last_phase->data_tail=last_phase->data_head;
1051 }
1052 data_ptr=last_phase->data_head->next;
1053 last_phase->data_head->next=last_phase->data_head->next->next;
1054 DATUM_FREE(ins, data_ptr);
1055 }
1056 ins->output.len=i;
1057 }else{
1058 i=0;
1059 data_ptr=last_phase->data_head;
1060 while(data_ptr){
1061 i+=data_ptr->len;
1062 data_ptr=data_ptr->next;
1063 }
1064 ins->output.len=i;
1065 }
1066 break;
1067 case BSDCONV_FILE:
1068 fp=ins->output.data;
1069 while(last_phase->data_head->next){
1070 data_ptr=last_phase->data_head->next;
1071 fwrite(data_ptr->data, data_ptr->len, 1, fp);
1072 last_phase->data_head->next=last_phase->data_head->next->next;
1073 DATUM_FREE(ins, data_ptr);
1074 }
1075 last_phase->data_tail=last_phase->data_head;
1076 break;
1077 case BSDCONV_FD:
1078 fd=(intptr_t)ins->output.data;
1079 while(last_phase->data_head->next){
1080 data_ptr=last_phase->data_head->next;
1081 write(fd, data_ptr->data, data_ptr->len);
1082 last_phase->data_head->next=last_phase->data_head->next->next;
1083 DATUM_FREE(ins, data_ptr);
1084 }
1085 last_phase->data_tail=last_phase->data_head;
1086 break;
1087 case BSDCONV_NULL:
1088 while(last_phase->data_head->next){
1089 data_ptr=last_phase->data_head->next;
1090 last_phase->data_head->next=last_phase->data_head->next->next;
1091 DATUM_FREE(ins, data_ptr);
1092 }
1093 last_phase->data_tail=last_phase->data_head;
1094 break;
1095 case BSDCONV_PASS:
1096 inso=ins->output.data;
1097 if(last_phase->data_head->next){
1098 inso->input=*(last_phase->data_head->next);
1099 data_ptr=last_phase->data_head->next->next;
1100 last_phase->data_head->next->flags &= ~F_FREE;
1101 DATUM_FREE(ins, last_phase->data_head->next);
1102 last_phase->data_head->next=data_ptr;
1103 }
1104 struct data_rt *tail;
1105 tail=&inso->input;
1106 while(last_phase->data_head->next){
1107 tail->next=dup_data_rt(inso, last_phase->data_head->next);
1108 tail=tail->next;
1109 data_ptr=last_phase->data_head->next->next;
1110 DATUM_FREE(ins, last_phase->data_head->next);
1111 last_phase->data_head->next=data_ptr;
1112 }
1113 last_phase->data_tail=last_phase->data_head;
1114 break;
1115 }
1116 return;
1117 }
1118
bsdconv_error(void)1119 char * bsdconv_error(void){
1120 switch(GetLastError()){
1121 case EDOOFUS:
1122 return strdup("Unexpected condition");
1123 case EOPNOTSUPP:
1124 return strdup("Unsupported charset/encoding or filter");
1125 case ENOMEM:
1126 return strdup("Mmap failed");
1127 case EINVAL:
1128 return strdup("Conversion syntax error");
1129 default:
1130 return strdup("Unknown error");
1131 }
1132 }
1133