1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2
3 #include "ef_euccn_conv.h"
4
5 #include <pobl/bl_mem.h>
6 #include <pobl/bl_debug.h>
7
8 #include "ef_iso2022_conv.h"
9 #include "ef_iso2022_intern.h"
10 #include "ef_ucs4_map.h"
11 #include "ef_zh_cn_map.h"
12 #include "ef_gb18030_2000_intern.h"
13
14 typedef enum euccn_encoding {
15 EUCCN_NORMAL,
16 EUCCN_GBK,
17 EUCCN_GB18030_2000
18
19 } enccn_encoding_t;
20
21 /* --- static functions --- */
22
remap_unsupported_charset(ef_char_t * ch,enccn_encoding_t encoding)23 static void remap_unsupported_charset(ef_char_t *ch, enccn_encoding_t encoding) {
24 ef_char_t c;
25
26 if (ch->cs == ISO10646_UCS4_1) {
27 if (ef_map_ucs4_to_zh_cn(&c, ch)) {
28 *ch = c;
29 }
30 }
31
32 if (encoding == EUCCN_NORMAL) {
33 ef_iso2022_remap_unsupported_charset(ch);
34 } else {
35 if (ch->cs == ISO10646_UCS4_1) {
36 return;
37 }
38
39 if (ch->cs == GB2312_80) {
40 if (ef_map_gb2312_80_to_gbk(&c, ch)) {
41 *ch = c;
42 }
43 }
44 }
45 }
46
convert_to_euccn_intern(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser,enccn_encoding_t encoding)47 static size_t convert_to_euccn_intern(ef_conv_t *conv, u_char *dst, size_t dst_size,
48 ef_parser_t *parser, enccn_encoding_t encoding) {
49 size_t filled_size;
50 ef_char_t ch;
51
52 filled_size = 0;
53 while (ef_parser_next_char(parser, &ch)) {
54 remap_unsupported_charset(&ch, encoding);
55
56 if (ch.cs == US_ASCII) {
57 if (filled_size >= dst_size) {
58 ef_parser_full_reset(parser);
59
60 return filled_size;
61 }
62
63 *(dst++) = *ch.ch;
64
65 filled_size++;
66 } else if (encoding == EUCCN_NORMAL && ch.cs == GB2312_80) {
67 if (filled_size + 1 >= dst_size) {
68 ef_parser_full_reset(parser);
69
70 return filled_size;
71 }
72
73 *(dst++) = MAP_TO_GR(ch.ch[0]);
74 *(dst++) = MAP_TO_GR(ch.ch[1]);
75
76 filled_size += 2;
77 } else if ((encoding == EUCCN_GBK || encoding == EUCCN_GB18030_2000) && ch.cs == GBK) {
78 if (filled_size + 1 >= dst_size) {
79 ef_parser_full_reset(parser);
80
81 return filled_size;
82 }
83
84 *(dst++) = ch.ch[0];
85 *(dst++) = ch.ch[1];
86
87 filled_size += 2;
88 } else if (encoding == EUCCN_GB18030_2000 && ch.cs == ISO10646_UCS4_1) {
89 u_char gb18030[4];
90
91 if (filled_size + 3 >= dst_size) {
92 ef_parser_full_reset(parser);
93
94 return filled_size;
95 }
96
97 if (ef_encode_ucs4_to_gb18030_2000(gb18030, ch.ch) == 0) {
98 continue;
99 }
100
101 *(dst++) = gb18030[0];
102 *(dst++) = gb18030[1];
103 *(dst++) = gb18030[2];
104 *(dst++) = gb18030[3];
105
106 filled_size += 4;
107 } else if (conv->illegal_char) {
108 size_t size;
109 int is_full;
110
111 size = (*conv->illegal_char)(conv, dst, dst_size - filled_size, &is_full, &ch);
112 if (is_full) {
113 ef_parser_full_reset(parser);
114
115 return filled_size;
116 }
117
118 dst += size;
119 filled_size += size;
120 }
121 }
122
123 return filled_size;
124 }
125
convert_to_euccn(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)126 static size_t convert_to_euccn(ef_conv_t *conv, u_char *dst, size_t dst_size,
127 ef_parser_t *parser) {
128 return convert_to_euccn_intern(conv, dst, dst_size, parser, EUCCN_NORMAL);
129 }
130
convert_to_gbk(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)131 static size_t convert_to_gbk(ef_conv_t *conv, u_char *dst, size_t dst_size, ef_parser_t *parser) {
132 return convert_to_euccn_intern(conv, dst, dst_size, parser, EUCCN_GBK);
133 }
134
convert_to_gb18030_2000(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)135 static size_t convert_to_gb18030_2000(ef_conv_t *conv, u_char *dst, size_t dst_size,
136 ef_parser_t *parser) {
137 return convert_to_euccn_intern(conv, dst, dst_size, parser, EUCCN_GB18030_2000);
138 }
139
euccn_conv_init(ef_conv_t * conv)140 static void euccn_conv_init(ef_conv_t *conv) {
141 ef_iso2022_conv_t *iso2022_conv;
142
143 iso2022_conv = (ef_iso2022_conv_t*)conv;
144
145 iso2022_conv->gl = &iso2022_conv->g0;
146 iso2022_conv->gr = &iso2022_conv->g1;
147 iso2022_conv->g0 = US_ASCII;
148 iso2022_conv->g1 = GB2312_80;
149 iso2022_conv->g2 = UNKNOWN_CS;
150 iso2022_conv->g3 = UNKNOWN_CS;
151 }
152
conv_init(ef_conv_t * conv)153 static void conv_init(ef_conv_t *conv) {}
154
conv_destroy(ef_conv_t * conv)155 static void conv_destroy(ef_conv_t *conv) { free(conv); }
156
157 /* --- global functions --- */
158
ef_euccn_conv_new(void)159 ef_conv_t *ef_euccn_conv_new(void) {
160 ef_iso2022_conv_t *iso2022_conv;
161
162 if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
163 return NULL;
164 }
165
166 euccn_conv_init((ef_conv_t*)iso2022_conv);
167
168 iso2022_conv->conv.convert = convert_to_euccn;
169 iso2022_conv->conv.init = euccn_conv_init;
170 iso2022_conv->conv.destroy = conv_destroy;
171 iso2022_conv->conv.illegal_char = NULL;
172
173 return (ef_conv_t*)iso2022_conv;
174 }
175
ef_gbk_conv_new(void)176 ef_conv_t *ef_gbk_conv_new(void) {
177 ef_conv_t *conv;
178
179 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
180 return NULL;
181 }
182
183 conv->convert = convert_to_gbk;
184 conv->init = conv_init;
185 conv->destroy = conv_destroy;
186 conv->illegal_char = NULL;
187
188 return conv;
189 }
190
ef_gb18030_2000_conv_new(void)191 ef_conv_t *ef_gb18030_2000_conv_new(void) {
192 ef_conv_t *conv;
193
194 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
195 return NULL;
196 }
197
198 conv->convert = convert_to_gb18030_2000;
199 conv->init = conv_init;
200 conv->destroy = conv_destroy;
201 conv->illegal_char = NULL;
202
203 return conv;
204 }
205