1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2 
3 #include "ef_8bit_conv.h"
4 
5 #include <pobl/bl_mem.h>
6 #include <pobl/bl_debug.h>
7 
8 #include "ef_ru_map.h"
9 #include "ef_ucs4_map.h"
10 
11 #if 0
12 #define __DEBUG
13 #endif
14 
15 typedef struct ef_iscii_conv {
16   ef_conv_t conv;
17   ef_charset_t cs;
18 
19 } ef_iscii_conv_t;
20 
21 /* --- static functions --- */
22 
map_direct(ef_char_t * dst,ef_char_t * src,ef_charset_t to_cs)23 static int map_direct(ef_char_t *dst, ef_char_t *src, ef_charset_t to_cs) {
24   if (src->cs == KOI8_U && to_cs == KOI8_R) {
25     return ef_map_koi8_u_to_koi8_r(dst, src);
26   } else if (src->cs == KOI8_R && to_cs == KOI8_U) {
27     return ef_map_koi8_r_to_koi8_u(dst, src);
28   } else if (src->cs == ISO10646_UCS4_1 && src->ch[0] == 0 && src->ch[1] == 0 && src->ch[2] == 0 &&
29              src->ch[3] <= 0x7f) {
30     dst->cs = US_ASCII;
31     dst->size = 1;
32     dst->property = 0;
33     dst->ch[0] = src->ch[3];
34 
35     return 1;
36   }
37 
38   return 0;
39 }
40 
remap_unsupported_charset(ef_char_t * ch,ef_charset_t to_cs)41 static void remap_unsupported_charset(ef_char_t *ch, ef_charset_t to_cs) {
42   ef_char_t c;
43 
44   if (ch->cs == to_cs) {
45     /* do nothing */
46   } else if (map_direct(&c, ch, to_cs)) {
47     *ch = c;
48   } else if (ef_map_via_ucs(&c, ch, to_cs)) {
49     *ch = c;
50   }
51 
52   if (to_cs == VISCII && ch->cs == US_ASCII) {
53     if (ch->ch[0] == 0x02 || ch->ch[0] == 0x05 || ch->ch[0] == 0x06 || ch->ch[0] == 0x14 ||
54         ch->ch[0] == 0x19 || ch->ch[0] == 0x1e) {
55       ch->cs = VISCII;
56     }
57   }
58 }
59 
convert_to_intern(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser,ef_charset_t to_cs)60 static size_t convert_to_intern(ef_conv_t *conv, u_char *dst, size_t dst_size,
61                                 ef_parser_t *parser, ef_charset_t to_cs) {
62   size_t filled_size;
63   ef_char_t ch;
64 
65   filled_size = 0;
66   while (ef_parser_next_char(parser, &ch)) {
67     remap_unsupported_charset(&ch, to_cs);
68 
69     if (to_cs == ch.cs || ch.cs == US_ASCII) {
70       if (filled_size >= dst_size) {
71         ef_parser_full_reset(parser);
72 
73         return filled_size;
74       }
75 
76       *(dst++) = ch.ch[0];
77 
78       filled_size++;
79     } else if (conv->illegal_char) {
80       size_t size;
81       int is_full;
82 
83       size = (*conv->illegal_char)(conv, dst, dst_size - filled_size, &is_full, &ch);
84       if (is_full) {
85         ef_parser_full_reset(parser);
86 
87         return filled_size;
88       }
89 
90       dst += size;
91       filled_size += size;
92     }
93   }
94 
95   return filled_size;
96 }
97 
convert_to_koi8_r(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)98 static size_t convert_to_koi8_r(ef_conv_t *conv, u_char *dst, size_t dst_size,
99                                 ef_parser_t *parser) {
100   return convert_to_intern(conv, dst, dst_size, parser, KOI8_R);
101 }
102 
convert_to_koi8_u(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)103 static size_t convert_to_koi8_u(ef_conv_t *conv, u_char *dst, size_t dst_size,
104                                 ef_parser_t *parser) {
105   return convert_to_intern(conv, dst, dst_size, parser, KOI8_U);
106 }
107 
convert_to_koi8_t(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)108 static size_t convert_to_koi8_t(ef_conv_t *conv, u_char *dst, size_t dst_size,
109                                 ef_parser_t *parser) {
110   return convert_to_intern(conv, dst, dst_size, parser, KOI8_T);
111 }
112 
convert_to_georgian_ps(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)113 static size_t convert_to_georgian_ps(ef_conv_t *conv, u_char *dst, size_t dst_size,
114                                      ef_parser_t *parser) {
115   return convert_to_intern(conv, dst, dst_size, parser, GEORGIAN_PS);
116 }
117 
convert_to_cp1250(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)118 static size_t convert_to_cp1250(ef_conv_t *conv, u_char *dst, size_t dst_size,
119                                 ef_parser_t *parser) {
120   return convert_to_intern(conv, dst, dst_size, parser, CP1250);
121 }
122 
convert_to_cp1251(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)123 static size_t convert_to_cp1251(ef_conv_t *conv, u_char *dst, size_t dst_size,
124                                 ef_parser_t *parser) {
125   return convert_to_intern(conv, dst, dst_size, parser, CP1251);
126 }
127 
convert_to_cp1252(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)128 static size_t convert_to_cp1252(ef_conv_t *conv, u_char *dst, size_t dst_size,
129                                 ef_parser_t *parser) {
130   return convert_to_intern(conv, dst, dst_size, parser, CP1252);
131 }
132 
convert_to_cp1253(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)133 static size_t convert_to_cp1253(ef_conv_t *conv, u_char *dst, size_t dst_size,
134                                 ef_parser_t *parser) {
135   return convert_to_intern(conv, dst, dst_size, parser, CP1253);
136 }
137 
convert_to_cp1254(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)138 static size_t convert_to_cp1254(ef_conv_t *conv, u_char *dst, size_t dst_size,
139                                 ef_parser_t *parser) {
140   return convert_to_intern(conv, dst, dst_size, parser, CP1254);
141 }
142 
convert_to_cp1255(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)143 static size_t convert_to_cp1255(ef_conv_t *conv, u_char *dst, size_t dst_size,
144                                 ef_parser_t *parser) {
145   return convert_to_intern(conv, dst, dst_size, parser, CP1255);
146 }
147 
convert_to_cp1256(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)148 static size_t convert_to_cp1256(ef_conv_t *conv, u_char *dst, size_t dst_size,
149                                 ef_parser_t *parser) {
150   return convert_to_intern(conv, dst, dst_size, parser, CP1256);
151 }
152 
convert_to_cp1257(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)153 static size_t convert_to_cp1257(ef_conv_t *conv, u_char *dst, size_t dst_size,
154                                 ef_parser_t *parser) {
155   return convert_to_intern(conv, dst, dst_size, parser, CP1257);
156 }
157 
convert_to_cp1258(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)158 static size_t convert_to_cp1258(ef_conv_t *conv, u_char *dst, size_t dst_size,
159                                 ef_parser_t *parser) {
160   return convert_to_intern(conv, dst, dst_size, parser, CP1258);
161 }
162 
convert_to_cp874(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)163 static size_t convert_to_cp874(ef_conv_t *conv, u_char *dst, size_t dst_size,
164                                ef_parser_t *parser) {
165   return convert_to_intern(conv, dst, dst_size, parser, CP874);
166 }
167 
convert_to_viscii(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)168 static size_t convert_to_viscii(ef_conv_t *conv, u_char *dst, size_t dst_size,
169                                 ef_parser_t *parser) {
170   return convert_to_intern(conv, dst, dst_size, parser, VISCII);
171 }
172 
convert_to_iscii(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)173 static size_t convert_to_iscii(ef_conv_t *conv, u_char *dst, size_t dst_size,
174                                ef_parser_t *parser) {
175   return convert_to_intern(conv, dst, dst_size, parser, ((ef_iscii_conv_t*)conv)->cs);
176 }
177 
conv_init(ef_conv_t * conv)178 static void conv_init(ef_conv_t *conv) {}
179 
conv_destroy(ef_conv_t * conv)180 static void conv_destroy(ef_conv_t *conv) { free(conv); }
181 
iscii_conv_new(ef_charset_t cs)182 static ef_conv_t *iscii_conv_new(ef_charset_t cs) {
183   ef_iscii_conv_t *iscii_conv;
184 
185   if ((iscii_conv = malloc(sizeof(ef_iscii_conv_t))) == NULL) {
186     return NULL;
187   }
188 
189   iscii_conv->conv.convert = convert_to_iscii;
190   iscii_conv->conv.init = conv_init;
191   iscii_conv->conv.destroy = conv_destroy;
192   iscii_conv->conv.illegal_char = NULL;
193   iscii_conv->cs = cs;
194 
195   return &iscii_conv->conv;
196 }
197 
198 /* --- global functions --- */
199 
ef_koi8_r_conv_new(void)200 ef_conv_t *ef_koi8_r_conv_new(void) {
201   ef_conv_t *conv;
202 
203   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
204     return NULL;
205   }
206 
207   conv->convert = convert_to_koi8_r;
208   conv->init = conv_init;
209   conv->destroy = conv_destroy;
210   conv->illegal_char = NULL;
211 
212   return conv;
213 }
214 
ef_koi8_u_conv_new(void)215 ef_conv_t *ef_koi8_u_conv_new(void) {
216   ef_conv_t *conv;
217 
218   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
219     return NULL;
220   }
221 
222   conv->convert = convert_to_koi8_u;
223   conv->init = conv_init;
224   conv->destroy = conv_destroy;
225   conv->illegal_char = NULL;
226 
227   return conv;
228 }
229 
ef_koi8_t_conv_new(void)230 ef_conv_t *ef_koi8_t_conv_new(void) {
231   ef_conv_t *conv;
232 
233   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
234     return NULL;
235   }
236 
237   conv->convert = convert_to_koi8_t;
238   conv->init = conv_init;
239   conv->destroy = conv_destroy;
240   conv->illegal_char = NULL;
241 
242   return conv;
243 }
244 
ef_georgian_ps_conv_new(void)245 ef_conv_t *ef_georgian_ps_conv_new(void) {
246   ef_conv_t *conv;
247 
248   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
249     return NULL;
250   }
251 
252   conv->convert = convert_to_georgian_ps;
253   conv->init = conv_init;
254   conv->destroy = conv_destroy;
255   conv->illegal_char = NULL;
256 
257   return conv;
258 }
259 
ef_cp1250_conv_new(void)260 ef_conv_t *ef_cp1250_conv_new(void) {
261   ef_conv_t *conv;
262 
263   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
264     return NULL;
265   }
266 
267   conv->convert = convert_to_cp1250;
268   conv->init = conv_init;
269   conv->destroy = conv_destroy;
270   conv->illegal_char = NULL;
271 
272   return conv;
273 }
274 
ef_cp1251_conv_new(void)275 ef_conv_t *ef_cp1251_conv_new(void) {
276   ef_conv_t *conv;
277 
278   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
279     return NULL;
280   }
281 
282   conv->convert = convert_to_cp1251;
283   conv->init = conv_init;
284   conv->destroy = conv_destroy;
285   conv->illegal_char = NULL;
286 
287   return conv;
288 }
289 
ef_cp1252_conv_new(void)290 ef_conv_t *ef_cp1252_conv_new(void) {
291   ef_conv_t *conv;
292 
293   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
294     return NULL;
295   }
296 
297   conv->convert = convert_to_cp1252;
298   conv->init = conv_init;
299   conv->destroy = conv_destroy;
300   conv->illegal_char = NULL;
301 
302   return conv;
303 }
304 
ef_cp1253_conv_new(void)305 ef_conv_t *ef_cp1253_conv_new(void) {
306   ef_conv_t *conv;
307 
308   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
309     return NULL;
310   }
311 
312   conv->convert = convert_to_cp1253;
313   conv->init = conv_init;
314   conv->destroy = conv_destroy;
315   conv->illegal_char = NULL;
316 
317   return conv;
318 }
319 
ef_cp1254_conv_new(void)320 ef_conv_t *ef_cp1254_conv_new(void) {
321   ef_conv_t *conv;
322 
323   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
324     return NULL;
325   }
326 
327   conv->convert = convert_to_cp1254;
328   conv->init = conv_init;
329   conv->destroy = conv_destroy;
330   conv->illegal_char = NULL;
331 
332   return conv;
333 }
334 
ef_cp1255_conv_new(void)335 ef_conv_t *ef_cp1255_conv_new(void) {
336   ef_conv_t *conv;
337 
338   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
339     return NULL;
340   }
341 
342   conv->convert = convert_to_cp1255;
343   conv->init = conv_init;
344   conv->destroy = conv_destroy;
345   conv->illegal_char = NULL;
346 
347   return conv;
348 }
349 
ef_cp1256_conv_new(void)350 ef_conv_t *ef_cp1256_conv_new(void) {
351   ef_conv_t *conv;
352 
353   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
354     return NULL;
355   }
356 
357   conv->convert = convert_to_cp1256;
358   conv->init = conv_init;
359   conv->destroy = conv_destroy;
360   conv->illegal_char = NULL;
361 
362   return conv;
363 }
364 
ef_cp1257_conv_new(void)365 ef_conv_t *ef_cp1257_conv_new(void) {
366   ef_conv_t *conv;
367 
368   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
369     return NULL;
370   }
371 
372   conv->convert = convert_to_cp1257;
373   conv->init = conv_init;
374   conv->destroy = conv_destroy;
375   conv->illegal_char = NULL;
376 
377   return conv;
378 }
379 
ef_cp1258_conv_new(void)380 ef_conv_t *ef_cp1258_conv_new(void) {
381   ef_conv_t *conv;
382 
383   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
384     return NULL;
385   }
386 
387   conv->convert = convert_to_cp1258;
388   conv->init = conv_init;
389   conv->destroy = conv_destroy;
390   conv->illegal_char = NULL;
391 
392   return conv;
393 }
394 
ef_cp874_conv_new(void)395 ef_conv_t *ef_cp874_conv_new(void) {
396   ef_conv_t *conv;
397 
398   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
399     return NULL;
400   }
401 
402   conv->convert = convert_to_cp874;
403   conv->init = conv_init;
404   conv->destroy = conv_destroy;
405   conv->illegal_char = NULL;
406 
407   return conv;
408 }
409 
ef_viscii_conv_new(void)410 ef_conv_t *ef_viscii_conv_new(void) {
411   ef_conv_t *conv;
412 
413   if ((conv = malloc(sizeof(ef_conv_t))) == NULL) {
414     return NULL;
415   }
416 
417   conv->convert = convert_to_viscii;
418   conv->init = conv_init;
419   conv->destroy = conv_destroy;
420   conv->illegal_char = NULL;
421 
422   return conv;
423 }
424 
ef_iscii_assamese_conv_new(void)425 ef_conv_t *ef_iscii_assamese_conv_new(void) { return iscii_conv_new(ISCII_ASSAMESE); }
426 
ef_iscii_bengali_conv_new(void)427 ef_conv_t *ef_iscii_bengali_conv_new(void) { return iscii_conv_new(ISCII_BENGALI); }
428 
ef_iscii_gujarati_conv_new(void)429 ef_conv_t *ef_iscii_gujarati_conv_new(void) { return iscii_conv_new(ISCII_GUJARATI); }
430 
ef_iscii_hindi_conv_new(void)431 ef_conv_t *ef_iscii_hindi_conv_new(void) { return iscii_conv_new(ISCII_HINDI); }
432 
ef_iscii_kannada_conv_new(void)433 ef_conv_t *ef_iscii_kannada_conv_new(void) { return iscii_conv_new(ISCII_KANNADA); }
434 
ef_iscii_malayalam_conv_new(void)435 ef_conv_t *ef_iscii_malayalam_conv_new(void) { return iscii_conv_new(ISCII_MALAYALAM); }
436 
ef_iscii_oriya_conv_new(void)437 ef_conv_t *ef_iscii_oriya_conv_new(void) { return iscii_conv_new(ISCII_ORIYA); }
438 
ef_iscii_punjabi_conv_new(void)439 ef_conv_t *ef_iscii_punjabi_conv_new(void) { return iscii_conv_new(ISCII_PUNJABI); }
440 
ef_iscii_tamil_conv_new(void)441 ef_conv_t *ef_iscii_tamil_conv_new(void) { return iscii_conv_new(ISCII_TAMIL); }
442 
ef_iscii_telugu_conv_new(void)443 ef_conv_t *ef_iscii_telugu_conv_new(void) { return iscii_conv_new(ISCII_TELUGU); }
444