1 /*
2  * Reference:
3  * http://icu-project.org/docs/papers/gb18030.html
4  * http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
5 */
6 
7 #include <stdlib.h>
8 #include <string.h>
9 #include "../../src/bsdconv.h"
10 
11 struct my_s{
12 	int status;
13 	uint32_t ucs;
14 };
15 
cbcreate(struct bsdconv_instance * ins,struct bsdconv_hash_entry * arg)16 int cbcreate(struct bsdconv_instance *ins, struct bsdconv_hash_entry *arg){
17 	struct my_s *r=malloc(sizeof(struct my_s));
18 	THIS_CODEC(ins)->priv=r;
19 	return 0;
20 }
21 
cbinit(struct bsdconv_instance * ins)22 void cbinit(struct bsdconv_instance *ins){
23 	struct my_s *r=THIS_CODEC(ins)->priv;
24 	r->status=0;
25 }
26 
cbdestroy(struct bsdconv_instance * ins)27 void cbdestroy(struct bsdconv_instance *ins){
28 	struct my_s *r=THIS_CODEC(ins)->priv;
29 	free(r);
30 }
31 
32 #define DEADEND() do{	\
33 	this_phase->state.status=DEADEND;	\
34 	t->status=0;	\
35 	return;	\
36 }while(0);
37 
38 struct gb18030_data {
39 	uint32_t beg;
40 	uint32_t end;
41 	uint32_t off;
42 };
43 
44 static const struct gb18030_data gb18030_table[] = {
45 	{1688038, 1695139, 0x0452},
46 	{1696437, 1698546, 0x2643},
47 	{1700191, 1700955, 0x361B},
48 	{1701916, 1702800, 0x3CE1},
49 	{1703065, 1703535, 0x4160},
50 	{1703947, 1704319, 0x44D7},
51 	{1704636, 1705076, 0x478E},
52 	{1705179, 1705881, 0x49B8},
53 	{1706261, 1720686, 0x9FA6},
54 	{1720768, 1725062, 0xE865},
55 	{1725296, 1726325, 0xFA2A},
56 	{1726612, 1726637, 0xFFE6},
57 	{1876218, 2924793, 0x10000},
58 };
59 
cbconv(struct bsdconv_instance * ins)60 void cbconv(struct bsdconv_instance *ins){
61 	struct bsdconv_phase *this_phase=THIS_PHASE(ins);
62 	struct my_s *t=THIS_CODEC(ins)->priv;
63 	unsigned char d;
64 	unsigned char *c;
65 	struct data_st data;
66 	int max=sizeof(gb18030_table) / sizeof(struct gb18030_data) - 1;
67 	int min = 0;
68 	int mid;
69 	int i;
70 	ucs_t ucs;
71 
72 	for(;this_phase->i<this_phase->curr->len;this_phase->i+=1){
73 		d=UCP(this_phase->curr->data)[this_phase->i];
74 		memcpy(&data, (char *)(this_phase->codec[this_phase->index].data_z+(uintptr_t)this_phase->state.data), sizeof(struct data_st));
75 		c=UCP(this_phase->codec[this_phase->index].data_z+de_offset(data.data));
76 		next:
77 		switch(t->status){
78 			case 0:
79 				if(t->status<data.len){
80 					t->ucs=c[0]*10;
81 					t->status=1;
82 					goto next;
83 				}
84 				t->ucs=d*10;
85 				t->status=1;
86 				break;
87 			case 1:
88 				if(t->status<data.len){
89 					t->ucs+=c[1];
90 					t->ucs*=126;
91 					t->status=2;
92 					goto next;
93 				}
94 				t->ucs+=d;
95 				t->ucs*=126;
96 				t->status=2;
97 				break;
98 			case 2:
99 				if(t->status<data.len){
100 					t->ucs+=c[2];
101 					t->ucs*=10;
102 					t->status=3;
103 					goto next;
104 				}
105 				t->ucs+=d;
106 				t->ucs*=10;
107 				t->status=3;
108 				break;
109 			case 3:
110 				if(t->status<data.len){
111 					t->ucs+=c[3];
112 					t->status=0;
113 					goto next;
114 				}
115 				t->ucs+=d;
116 				t->status=0;
117 				if (t->ucs < gb18030_table[0].beg || t->ucs > gb18030_table[max].end){
118 					DEADEND();
119 				}else while (max >= min) {
120 					mid = (min + max) / 2;
121 					if (t->ucs > gb18030_table[mid].end)
122 						min = mid + 1;
123 					else if (t->ucs < gb18030_table[mid].beg)
124 						max = mid - 1;
125 					else{
126 						break;
127 					}
128 				}
129 				if(gb18030_table[mid].beg<=t->ucs && t->ucs<=gb18030_table[mid].end){
130 					ucs.ucs4=htobe32(gb18030_table[mid].off + (t->ucs - gb18030_table[mid].beg));
131 					for(i=0;ucs.byte[i]==0 && i<4;++i);
132 					DATA_MALLOC(ins, this_phase->data_tail->next);
133 					this_phase->data_tail=this_phase->data_tail->next;
134 					this_phase->data_tail->next=NULL;
135 					this_phase->data_tail->len=5 - i;
136 					this_phase->data_tail->data=c=malloc(5 - i);
137 					this_phase->data_tail->flags=F_FREE;
138 					this_phase->state.status=NEXTPHASE;
139 					*c=0x01;
140 					c+=1;
141 					for(;i<4;++i,c+=1){
142 						*c=ucs.byte[i];
143 					}
144 					return;
145 				}else{
146 					DEADEND();
147 				}
148 				break;
149 			default:
150 				DEADEND();
151 		}
152 	}
153 	this_phase->state.status=CONTINUE;
154 	return;
155 }
156