1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2
3 #include "ef_8bit_conv.h"
4
5 #include <pobl/bl_mem.h>
6 #include <pobl/bl_debug.h>
7
8 #include "ef_ru_map.h"
9 #include "ef_ucs4_map.h"
10
11 #if 0
12 #define __DEBUG
13 #endif
14
15 typedef struct ef_iscii_conv {
16 ef_conv_t conv;
17 ef_charset_t cs;
18
19 } ef_iscii_conv_t;
20
21 /* --- static functions --- */
22
map_direct(ef_char_t * dst,ef_char_t * src,ef_charset_t to_cs)23 static int map_direct(ef_char_t *dst, ef_char_t *src, ef_charset_t to_cs) {
24 if (src->cs == KOI8_U && to_cs == KOI8_R) {
25 return ef_map_koi8_u_to_koi8_r(dst, src);
26 } else if (src->cs == KOI8_R && to_cs == KOI8_U) {
27 return ef_map_koi8_r_to_koi8_u(dst, src);
28 } else if (src->cs == ISO10646_UCS4_1 && src->ch[0] == 0 && src->ch[1] == 0 && src->ch[2] == 0 &&
29 src->ch[3] <= 0x7f) {
30 dst->cs = US_ASCII;
31 dst->size = 1;
32 dst->property = 0;
33 dst->ch[0] = src->ch[3];
34
35 return 1;
36 }
37
38 return 0;
39 }
40
remap_unsupported_charset(ef_char_t * ch,ef_charset_t to_cs)41 static void remap_unsupported_charset(ef_char_t *ch, ef_charset_t to_cs) {
42 ef_char_t c;
43
44 if (ch->cs == to_cs) {
45 /* do nothing */
46 } else if (map_direct(&c, ch, to_cs)) {
47 *ch = c;
48 } else if (ef_map_via_ucs(&c, ch, to_cs)) {
49 *ch = c;
50 }
51
52 if (to_cs == VISCII && ch->cs == US_ASCII) {
53 if (ch->ch[0] == 0x02 || ch->ch[0] == 0x05 || ch->ch[0] == 0x06 || ch->ch[0] == 0x14 ||
54 ch->ch[0] == 0x19 || ch->ch[0] == 0x1e) {
55 ch->cs = VISCII;
56 }
57 }
58 }
59
convert_to_intern(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser,ef_charset_t to_cs)60 static size_t convert_to_intern(ef_conv_t *conv, u_char *dst, size_t dst_size,
61 ef_parser_t *parser, ef_charset_t to_cs) {
62 size_t filled_size;
63 ef_char_t ch;
64
65 filled_size = 0;
66 while (ef_parser_next_char(parser, &ch)) {
67 remap_unsupported_charset(&ch, to_cs);
68
69 if (to_cs == ch.cs || ch.cs == US_ASCII) {
70 if (filled_size >= dst_size) {
71 ef_parser_full_reset(parser);
72
73 return filled_size;
74 }
75
76 *(dst++) = ch.ch[0];
77
78 filled_size++;
79 } else if (conv->illegal_char) {
80 size_t size;
81 int is_full;
82
83 size = (*conv->illegal_char)(conv, dst, dst_size - filled_size, &is_full, &ch);
84 if (is_full) {
85 ef_parser_full_reset(parser);
86
87 return filled_size;
88 }
89
90 dst += size;
91 filled_size += size;
92 }
93 }
94
95 return filled_size;
96 }
97
convert_to_koi8_r(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)98 static size_t convert_to_koi8_r(ef_conv_t *conv, u_char *dst, size_t dst_size,
99 ef_parser_t *parser) {
100 return convert_to_intern(conv, dst, dst_size, parser, KOI8_R);
101 }
102
convert_to_koi8_u(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)103 static size_t convert_to_koi8_u(ef_conv_t *conv, u_char *dst, size_t dst_size,
104 ef_parser_t *parser) {
105 return convert_to_intern(conv, dst, dst_size, parser, KOI8_U);
106 }
107
convert_to_koi8_t(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)108 static size_t convert_to_koi8_t(ef_conv_t *conv, u_char *dst, size_t dst_size,
109 ef_parser_t *parser) {
110 return convert_to_intern(conv, dst, dst_size, parser, KOI8_T);
111 }
112
convert_to_georgian_ps(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)113 static size_t convert_to_georgian_ps(ef_conv_t *conv, u_char *dst, size_t dst_size,
114 ef_parser_t *parser) {
115 return convert_to_intern(conv, dst, dst_size, parser, GEORGIAN_PS);
116 }
117
convert_to_cp1250(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)118 static size_t convert_to_cp1250(ef_conv_t *conv, u_char *dst, size_t dst_size,
119 ef_parser_t *parser) {
120 return convert_to_intern(conv, dst, dst_size, parser, CP1250);
121 }
122
convert_to_cp1251(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)123 static size_t convert_to_cp1251(ef_conv_t *conv, u_char *dst, size_t dst_size,
124 ef_parser_t *parser) {
125 return convert_to_intern(conv, dst, dst_size, parser, CP1251);
126 }
127
convert_to_cp1252(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)128 static size_t convert_to_cp1252(ef_conv_t *conv, u_char *dst, size_t dst_size,
129 ef_parser_t *parser) {
130 return convert_to_intern(conv, dst, dst_size, parser, CP1252);
131 }
132
convert_to_cp1253(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)133 static size_t convert_to_cp1253(ef_conv_t *conv, u_char *dst, size_t dst_size,
134 ef_parser_t *parser) {
135 return convert_to_intern(conv, dst, dst_size, parser, CP1253);
136 }
137
convert_to_cp1254(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)138 static size_t convert_to_cp1254(ef_conv_t *conv, u_char *dst, size_t dst_size,
139 ef_parser_t *parser) {
140 return convert_to_intern(conv, dst, dst_size, parser, CP1254);
141 }
142
convert_to_cp1255(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)143 static size_t convert_to_cp1255(ef_conv_t *conv, u_char *dst, size_t dst_size,
144 ef_parser_t *parser) {
145 return convert_to_intern(conv, dst, dst_size, parser, CP1255);
146 }
147
convert_to_cp1256(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)148 static size_t convert_to_cp1256(ef_conv_t *conv, u_char *dst, size_t dst_size,
149 ef_parser_t *parser) {
150 return convert_to_intern(conv, dst, dst_size, parser, CP1256);
151 }
152
convert_to_cp1257(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)153 static size_t convert_to_cp1257(ef_conv_t *conv, u_char *dst, size_t dst_size,
154 ef_parser_t *parser) {
155 return convert_to_intern(conv, dst, dst_size, parser, CP1257);
156 }
157
convert_to_cp1258(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)158 static size_t convert_to_cp1258(ef_conv_t *conv, u_char *dst, size_t dst_size,
159 ef_parser_t *parser) {
160 return convert_to_intern(conv, dst, dst_size, parser, CP1258);
161 }
162
convert_to_cp874(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)163 static size_t convert_to_cp874(ef_conv_t *conv, u_char *dst, size_t dst_size,
164 ef_parser_t *parser) {
165 return convert_to_intern(conv, dst, dst_size, parser, CP874);
166 }
167
convert_to_viscii(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)168 static size_t convert_to_viscii(ef_conv_t *conv, u_char *dst, size_t dst_size,
169 ef_parser_t *parser) {
170 return convert_to_intern(conv, dst, dst_size, parser, VISCII);
171 }
172
convert_to_iscii(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)173 static size_t convert_to_iscii(ef_conv_t *conv, u_char *dst, size_t dst_size,
174 ef_parser_t *parser) {
175 return convert_to_intern(conv, dst, dst_size, parser, ((ef_iscii_conv_t*)conv)->cs);
176 }
177
conv_init(ef_conv_t * conv)178 static void conv_init(ef_conv_t *conv) {}
179
conv_destroy(ef_conv_t * conv)180 static void conv_destroy(ef_conv_t *conv) { free(conv); }
181
iscii_conv_new(ef_charset_t cs)182 static ef_conv_t *iscii_conv_new(ef_charset_t cs) {
183 ef_iscii_conv_t *iscii_conv;
184
185 if ((iscii_conv = malloc(sizeof(ef_iscii_conv_t))) == NULL) {
186 return NULL;
187 }
188
189 iscii_conv->conv.convert = convert_to_iscii;
190 iscii_conv->conv.init = conv_init;
191 iscii_conv->conv.destroy = conv_destroy;
192 iscii_conv->conv.illegal_char = NULL;
193 iscii_conv->cs = cs;
194
195 return &iscii_conv->conv;
196 }
197
198 /* --- global functions --- */
199
ef_koi8_r_conv_new(void)200 ef_conv_t *ef_koi8_r_conv_new(void) {
201 ef_conv_t *conv;
202
203 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
204 return NULL;
205 }
206
207 conv->convert = convert_to_koi8_r;
208 conv->init = conv_init;
209 conv->destroy = conv_destroy;
210 conv->illegal_char = NULL;
211
212 return conv;
213 }
214
ef_koi8_u_conv_new(void)215 ef_conv_t *ef_koi8_u_conv_new(void) {
216 ef_conv_t *conv;
217
218 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
219 return NULL;
220 }
221
222 conv->convert = convert_to_koi8_u;
223 conv->init = conv_init;
224 conv->destroy = conv_destroy;
225 conv->illegal_char = NULL;
226
227 return conv;
228 }
229
ef_koi8_t_conv_new(void)230 ef_conv_t *ef_koi8_t_conv_new(void) {
231 ef_conv_t *conv;
232
233 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
234 return NULL;
235 }
236
237 conv->convert = convert_to_koi8_t;
238 conv->init = conv_init;
239 conv->destroy = conv_destroy;
240 conv->illegal_char = NULL;
241
242 return conv;
243 }
244
ef_georgian_ps_conv_new(void)245 ef_conv_t *ef_georgian_ps_conv_new(void) {
246 ef_conv_t *conv;
247
248 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
249 return NULL;
250 }
251
252 conv->convert = convert_to_georgian_ps;
253 conv->init = conv_init;
254 conv->destroy = conv_destroy;
255 conv->illegal_char = NULL;
256
257 return conv;
258 }
259
ef_cp1250_conv_new(void)260 ef_conv_t *ef_cp1250_conv_new(void) {
261 ef_conv_t *conv;
262
263 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
264 return NULL;
265 }
266
267 conv->convert = convert_to_cp1250;
268 conv->init = conv_init;
269 conv->destroy = conv_destroy;
270 conv->illegal_char = NULL;
271
272 return conv;
273 }
274
ef_cp1251_conv_new(void)275 ef_conv_t *ef_cp1251_conv_new(void) {
276 ef_conv_t *conv;
277
278 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
279 return NULL;
280 }
281
282 conv->convert = convert_to_cp1251;
283 conv->init = conv_init;
284 conv->destroy = conv_destroy;
285 conv->illegal_char = NULL;
286
287 return conv;
288 }
289
ef_cp1252_conv_new(void)290 ef_conv_t *ef_cp1252_conv_new(void) {
291 ef_conv_t *conv;
292
293 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
294 return NULL;
295 }
296
297 conv->convert = convert_to_cp1252;
298 conv->init = conv_init;
299 conv->destroy = conv_destroy;
300 conv->illegal_char = NULL;
301
302 return conv;
303 }
304
ef_cp1253_conv_new(void)305 ef_conv_t *ef_cp1253_conv_new(void) {
306 ef_conv_t *conv;
307
308 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
309 return NULL;
310 }
311
312 conv->convert = convert_to_cp1253;
313 conv->init = conv_init;
314 conv->destroy = conv_destroy;
315 conv->illegal_char = NULL;
316
317 return conv;
318 }
319
ef_cp1254_conv_new(void)320 ef_conv_t *ef_cp1254_conv_new(void) {
321 ef_conv_t *conv;
322
323 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
324 return NULL;
325 }
326
327 conv->convert = convert_to_cp1254;
328 conv->init = conv_init;
329 conv->destroy = conv_destroy;
330 conv->illegal_char = NULL;
331
332 return conv;
333 }
334
ef_cp1255_conv_new(void)335 ef_conv_t *ef_cp1255_conv_new(void) {
336 ef_conv_t *conv;
337
338 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
339 return NULL;
340 }
341
342 conv->convert = convert_to_cp1255;
343 conv->init = conv_init;
344 conv->destroy = conv_destroy;
345 conv->illegal_char = NULL;
346
347 return conv;
348 }
349
ef_cp1256_conv_new(void)350 ef_conv_t *ef_cp1256_conv_new(void) {
351 ef_conv_t *conv;
352
353 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
354 return NULL;
355 }
356
357 conv->convert = convert_to_cp1256;
358 conv->init = conv_init;
359 conv->destroy = conv_destroy;
360 conv->illegal_char = NULL;
361
362 return conv;
363 }
364
ef_cp1257_conv_new(void)365 ef_conv_t *ef_cp1257_conv_new(void) {
366 ef_conv_t *conv;
367
368 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
369 return NULL;
370 }
371
372 conv->convert = convert_to_cp1257;
373 conv->init = conv_init;
374 conv->destroy = conv_destroy;
375 conv->illegal_char = NULL;
376
377 return conv;
378 }
379
ef_cp1258_conv_new(void)380 ef_conv_t *ef_cp1258_conv_new(void) {
381 ef_conv_t *conv;
382
383 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
384 return NULL;
385 }
386
387 conv->convert = convert_to_cp1258;
388 conv->init = conv_init;
389 conv->destroy = conv_destroy;
390 conv->illegal_char = NULL;
391
392 return conv;
393 }
394
ef_cp874_conv_new(void)395 ef_conv_t *ef_cp874_conv_new(void) {
396 ef_conv_t *conv;
397
398 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
399 return NULL;
400 }
401
402 conv->convert = convert_to_cp874;
403 conv->init = conv_init;
404 conv->destroy = conv_destroy;
405 conv->illegal_char = NULL;
406
407 return conv;
408 }
409
ef_viscii_conv_new(void)410 ef_conv_t *ef_viscii_conv_new(void) {
411 ef_conv_t *conv;
412
413 if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
414 return NULL;
415 }
416
417 conv->convert = convert_to_viscii;
418 conv->init = conv_init;
419 conv->destroy = conv_destroy;
420 conv->illegal_char = NULL;
421
422 return conv;
423 }
424
ef_iscii_assamese_conv_new(void)425 ef_conv_t *ef_iscii_assamese_conv_new(void) { return iscii_conv_new(ISCII_ASSAMESE); }
426
ef_iscii_bengali_conv_new(void)427 ef_conv_t *ef_iscii_bengali_conv_new(void) { return iscii_conv_new(ISCII_BENGALI); }
428
ef_iscii_gujarati_conv_new(void)429 ef_conv_t *ef_iscii_gujarati_conv_new(void) { return iscii_conv_new(ISCII_GUJARATI); }
430
ef_iscii_hindi_conv_new(void)431 ef_conv_t *ef_iscii_hindi_conv_new(void) { return iscii_conv_new(ISCII_HINDI); }
432
ef_iscii_kannada_conv_new(void)433 ef_conv_t *ef_iscii_kannada_conv_new(void) { return iscii_conv_new(ISCII_KANNADA); }
434
ef_iscii_malayalam_conv_new(void)435 ef_conv_t *ef_iscii_malayalam_conv_new(void) { return iscii_conv_new(ISCII_MALAYALAM); }
436
ef_iscii_oriya_conv_new(void)437 ef_conv_t *ef_iscii_oriya_conv_new(void) { return iscii_conv_new(ISCII_ORIYA); }
438
ef_iscii_punjabi_conv_new(void)439 ef_conv_t *ef_iscii_punjabi_conv_new(void) { return iscii_conv_new(ISCII_PUNJABI); }
440
ef_iscii_tamil_conv_new(void)441 ef_conv_t *ef_iscii_tamil_conv_new(void) { return iscii_conv_new(ISCII_TAMIL); }
442
ef_iscii_telugu_conv_new(void)443 ef_conv_t *ef_iscii_telugu_conv_new(void) { return iscii_conv_new(ISCII_TELUGU); }
444