1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2 
3 #include "ef_iso8859_conv.h"
4 
5 #include <stdio.h> /* NULL */
6 #include <pobl/bl_mem.h>
7 #include <pobl/bl_debug.h>
8 
9 #include "ef_iso2022_conv.h"
10 #include "ef_iso2022_intern.h"
11 #include "ef_viet_map.h"
12 #include "ef_ru_map.h"
13 #include "ef_ucs4_iso8859.h"
14 #include "ef_ucs4_map.h"
15 
16 /* --- static functions --- */
17 
remap_unsupported_charset(ef_char_t * ch,ef_charset_t gr_cs)18 static void remap_unsupported_charset(ef_char_t *ch, ef_charset_t gr_cs) {
19   ef_char_t c;
20 
21   if (ch->cs == ISO10646_UCS4_1) {
22     if (ef_map_ucs4_to_cs(&c, ch, gr_cs)) {
23       *ch = c;
24 
25       return;
26     }
27   }
28 
29   ef_iso2022_remap_unsupported_charset(ch);
30 }
31 
convert_to_iso8859(ef_conv_t * conv,u_char * dst,size_t dst_size,ef_parser_t * parser)32 static size_t convert_to_iso8859(ef_conv_t *conv, u_char *dst, size_t dst_size,
33                                  ef_parser_t *parser) {
34   ef_iso2022_conv_t *iso2022_conv;
35   size_t filled_size;
36   ef_char_t ch;
37 
38   iso2022_conv = (ef_iso2022_conv_t *)conv;
39 
40   filled_size = 0;
41   while (ef_parser_next_char(parser, &ch)) {
42     remap_unsupported_charset(&ch, iso2022_conv->g1);
43 
44     if (ch.cs == US_ASCII) {
45       if (filled_size >= dst_size) {
46         ef_parser_full_reset(parser);
47 
48         return filled_size;
49       }
50 
51       *(dst++) = ch.ch[0];
52       filled_size++;
53     } else if (ch.cs == iso2022_conv->g1) {
54       if (filled_size >= dst_size) {
55         ef_parser_full_reset(parser);
56 
57         return filled_size;
58       }
59 
60       *(dst++) = SET_MSB(ch.ch[0]);
61       filled_size++;
62     } else if (conv->illegal_char) {
63       size_t size;
64       int is_full;
65 
66       size = (*conv->illegal_char)(conv, dst, dst_size - filled_size, &is_full, &ch);
67       if (is_full) {
68         ef_parser_full_reset(parser);
69 
70         return filled_size;
71       }
72 
73       dst += size;
74       filled_size += size;
75     }
76   }
77 
78   return filled_size;
79 }
80 
conv_init_intern(ef_conv_t * conv,ef_charset_t g1)81 static void conv_init_intern(ef_conv_t *conv, ef_charset_t g1) {
82   ef_iso2022_conv_t *iso2022_conv;
83 
84   iso2022_conv = (ef_iso2022_conv_t *)conv;
85 
86   iso2022_conv->gl = &iso2022_conv->g0;
87   iso2022_conv->gr = &iso2022_conv->g1;
88   iso2022_conv->g0 = US_ASCII;
89   iso2022_conv->g1 = g1;
90   iso2022_conv->g2 = UNKNOWN_CS;
91   iso2022_conv->g3 = UNKNOWN_CS;
92 }
93 
conv_init_iso8859_1(ef_conv_t * conv)94 static void conv_init_iso8859_1(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_1_R); }
95 
conv_init_iso8859_2(ef_conv_t * conv)96 static void conv_init_iso8859_2(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_2_R); }
97 
conv_init_iso8859_3(ef_conv_t * conv)98 static void conv_init_iso8859_3(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_3_R); }
99 
conv_init_iso8859_4(ef_conv_t * conv)100 static void conv_init_iso8859_4(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_4_R); }
101 
conv_init_iso8859_5(ef_conv_t * conv)102 static void conv_init_iso8859_5(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_5_R); }
103 
conv_init_iso8859_6(ef_conv_t * conv)104 static void conv_init_iso8859_6(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_6_R); }
105 
conv_init_iso8859_7(ef_conv_t * conv)106 static void conv_init_iso8859_7(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_7_R); }
107 
conv_init_iso8859_8(ef_conv_t * conv)108 static void conv_init_iso8859_8(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_8_R); }
109 
conv_init_iso8859_9(ef_conv_t * conv)110 static void conv_init_iso8859_9(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_9_R); }
111 
conv_init_iso8859_10(ef_conv_t * conv)112 static void conv_init_iso8859_10(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_10_R); }
113 
conv_init_tis620_2533(ef_conv_t * conv)114 static void conv_init_tis620_2533(ef_conv_t *conv) { conv_init_intern(conv, TIS620_2533); }
115 
conv_init_iso8859_13(ef_conv_t * conv)116 static void conv_init_iso8859_13(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_13_R); }
117 
conv_init_iso8859_14(ef_conv_t * conv)118 static void conv_init_iso8859_14(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_14_R); }
119 
conv_init_iso8859_15(ef_conv_t * conv)120 static void conv_init_iso8859_15(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_15_R); }
121 
conv_init_iso8859_16(ef_conv_t * conv)122 static void conv_init_iso8859_16(ef_conv_t *conv) { conv_init_intern(conv, ISO8859_16_R); }
123 
conv_init_tcvn5712_3_1993(ef_conv_t * conv)124 static void conv_init_tcvn5712_3_1993(ef_conv_t *conv) { conv_init_intern(conv, TCVN5712_3_1993); }
125 
conv_destroy(ef_conv_t * conv)126 static void conv_destroy(ef_conv_t *conv) { free(conv); }
127 
iso8859_conv_new(void (* init)(ef_conv_t *))128 static ef_conv_t *iso8859_conv_new(void (*init)(ef_conv_t *)) {
129   ef_iso2022_conv_t *iso2022_conv;
130 
131   if ((iso2022_conv = malloc(sizeof(ef_iso2022_conv_t))) == NULL) {
132     return NULL;
133   }
134 
135   (*init)((ef_conv_t *)iso2022_conv);
136 
137   iso2022_conv->conv.convert = convert_to_iso8859;
138   iso2022_conv->conv.init = init;
139   iso2022_conv->conv.destroy = conv_destroy;
140   iso2022_conv->conv.illegal_char = NULL;
141 
142   return (ef_conv_t *)iso2022_conv;
143 }
144 
145 /* --- global functions --- */
146 
ef_iso8859_1_conv_new(void)147 ef_conv_t *ef_iso8859_1_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_1); }
148 
ef_iso8859_2_conv_new(void)149 ef_conv_t *ef_iso8859_2_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_2); }
150 
ef_iso8859_3_conv_new(void)151 ef_conv_t *ef_iso8859_3_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_3); }
152 
ef_iso8859_4_conv_new(void)153 ef_conv_t *ef_iso8859_4_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_4); }
154 
ef_iso8859_5_conv_new(void)155 ef_conv_t *ef_iso8859_5_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_5); }
156 
ef_iso8859_6_conv_new(void)157 ef_conv_t *ef_iso8859_6_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_6); }
158 
ef_iso8859_7_conv_new(void)159 ef_conv_t *ef_iso8859_7_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_7); }
160 
ef_iso8859_8_conv_new(void)161 ef_conv_t *ef_iso8859_8_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_8); }
162 
ef_iso8859_9_conv_new(void)163 ef_conv_t *ef_iso8859_9_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_9); }
164 
ef_iso8859_10_conv_new(void)165 ef_conv_t *ef_iso8859_10_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_10); }
166 
ef_tis620_2533_conv_new(void)167 ef_conv_t *ef_tis620_2533_conv_new(void) { return iso8859_conv_new(conv_init_tis620_2533); }
168 
ef_iso8859_13_conv_new(void)169 ef_conv_t *ef_iso8859_13_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_13); }
170 
ef_iso8859_14_conv_new(void)171 ef_conv_t *ef_iso8859_14_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_14); }
172 
ef_iso8859_15_conv_new(void)173 ef_conv_t *ef_iso8859_15_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_15); }
174 
ef_iso8859_16_conv_new(void)175 ef_conv_t *ef_iso8859_16_conv_new(void) { return iso8859_conv_new(conv_init_iso8859_16); }
176 
ef_tcvn5712_3_1993_conv_new(void)177 ef_conv_t *ef_tcvn5712_3_1993_conv_new(void) {
178   return iso8859_conv_new(conv_init_tcvn5712_3_1993);
179 }
180