1 /*
2 * Reference:
3 * http://icu-project.org/docs/papers/gb18030.html
4 * http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
5 */
6
7 #include <stdlib.h>
8 #include <string.h>
9 #include "../../src/bsdconv.h"
10
11 struct my_s{
12 int status;
13 uint32_t ucs;
14 };
15
cbcreate(struct bsdconv_instance * ins,struct bsdconv_hash_entry * arg)16 int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
17 struct my_s *r=malloc(sizeof(struct my_s));
18 THIS_CODEC(ins)->priv=r;
19 return 0;
20 }
21
cbinit(struct bsdconv_instance * ins)22 void cbinit(struct bsdconv_instance *ins){
23 struct my_s *r=THIS_CODEC(ins)->priv;
24 r->status=0;
25 }
26
cbdestroy(struct bsdconv_instance * ins)27 void cbdestroy(struct bsdconv_instance *ins){
28 struct my_s *r=THIS_CODEC(ins)->priv;
29 free(r);
30 }
31
32 #define DEADEND() do{ \
33 this_phase->state.status=DEADEND; \
34 t->status=0; \
35 return; \
36 }while(0);
37
38 struct gb18030_data {
39 uint32_t beg;
40 uint32_t end;
41 uint32_t off;
42 };
43
44 static const struct gb18030_data gb18030_table[] = {
45 {1688038, 1695139, 0x0452},
46 {1696437, 1698546, 0x2643},
47 {1700191, 1700955, 0x361B},
48 {1701916, 1702800, 0x3CE1},
49 {1703065, 1703535, 0x4160},
50 {1703947, 1704319, 0x44D7},
51 {1704636, 1705076, 0x478E},
52 {1705179, 1705881, 0x49B8},
53 {1706261, 1720686, 0x9FA6},
54 {1720768, 1725062, 0xE865},
55 {1725296, 1726325, 0xFA2A},
56 {1726612, 1726637, 0xFFE6},
57 {1876218, 2924793, 0x10000},
58 };
59
cbconv(struct bsdconv_instance * ins)60 void cbconv(struct bsdconv_instance *ins){
61 struct bsdconv_phase *this_phase=THIS_PHASE(ins);
62 struct my_s *t=THIS_CODEC(ins)->priv;
63 unsigned char d;
64 unsigned char *c;
65 struct data_st data;
66 int max=sizeof(gb18030_table) / sizeof(struct gb18030_data) - 1;
67 int min = 0;
68 int mid;
69 int i;
70 ucs_t ucs;
71
72 for(;this_phase->i<this_phase->curr->len;this_phase->i+=1){
73 d=UCP(this_phase->curr->data)[this_phase->i];
74 memcpy(&data, (char *)(this_phase->codec[this_phase->index].data_z+(uintptr_t)this_phase->state.data), sizeof(struct data_st));
75 c=UCP(this_phase->codec[this_phase->index].data_z+de_offset(data.data));
76 next:
77 switch(t->status){
78 case 0:
79 if(t->status<data.len){
80 t->ucs=c[0]*10;
81 t->status=1;
82 goto next;
83 }
84 t->ucs=d*10;
85 t->status=1;
86 break;
87 case 1:
88 if(t->status<data.len){
89 t->ucs+=c[1];
90 t->ucs*=126;
91 t->status=2;
92 goto next;
93 }
94 t->ucs+=d;
95 t->ucs*=126;
96 t->status=2;
97 break;
98 case 2:
99 if(t->status<data.len){
100 t->ucs+=c[2];
101 t->ucs*=10;
102 t->status=3;
103 goto next;
104 }
105 t->ucs+=d;
106 t->ucs*=10;
107 t->status=3;
108 break;
109 case 3:
110 if(t->status<data.len){
111 t->ucs+=c[3];
112 t->status=0;
113 goto next;
114 }
115 t->ucs+=d;
116 t->status=0;
117 if (t->ucs < gb18030_table[0].beg || t->ucs > gb18030_table[max].end){
118 DEADEND();
119 }else while (max >= min) {
120 mid = (min + max) / 2;
121 if (t->ucs > gb18030_table[mid].end)
122 min = mid + 1;
123 else if (t->ucs < gb18030_table[mid].beg)
124 max = mid - 1;
125 else{
126 break;
127 }
128 }
129 if(gb18030_table[mid].beg<=t->ucs && t->ucs<=gb18030_table[mid].end){
130 ucs.ucs4=htobe32(gb18030_table[mid].off + (t->ucs - gb18030_table[mid].beg));
131 for(i=0;ucs.byte[i]==0 && i<4;++i);
132 DATA_MALLOC(ins, this_phase->data_tail->next);
133 this_phase->data_tail=this_phase->data_tail->next;
134 this_phase->data_tail->next=NULL;
135 this_phase->data_tail->len=5 - i;
136 this_phase->data_tail->data=c=malloc(5 - i);
137 this_phase->data_tail->flags=F_FREE;
138 this_phase->state.status=NEXTPHASE;
139 *c=0x01;
140 c+=1;
141 for(;i<4;++i,c+=1){
142 *c=ucs.byte[i];
143 }
144 return;
145 }else{
146 DEADEND();
147 }
148 break;
149 default:
150 DEADEND();
151 }
152 }
153 this_phase->state.status=CONTINUE;
154 return;
155 }
156