1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
5 /**
6 * \file
7 * \brief Implements simple ICONV
8 *
9 * This implements an interface similar to that of iconv and
10 * is used by YAZ to interface with iconv (if present).
11 * For systems where iconv is not present, this layer
12 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
13 *
14 */
15
16 #if HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <string.h>
23
24 #if HAVE_ICONV_H
25 #include <iconv.h>
26 #endif
27
28 #include <yaz/xmalloc.h>
29 #include <yaz/errno.h>
30 #include "iconv-p.h"
31
32 struct yaz_iconv_struct {
33 int my_errno;
34 int init_flag;
35 size_t no_read_x;
36 unsigned long unget_x;
37 #if HAVE_ICONV_H
38 iconv_t iconv_cd;
39 #endif
40 struct yaz_iconv_encoder_s encoder;
41 struct yaz_iconv_decoder_s decoder;
42 };
43
44
yaz_iconv_isbuiltin(yaz_iconv_t cd)45 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
46 {
47 return cd->decoder.read_handle && cd->encoder.write_handle;
48 }
49
50
prepare_encoders(yaz_iconv_t cd,const char * tocode)51 static int prepare_encoders(yaz_iconv_t cd, const char *tocode)
52 {
53 if (yaz_marc8_encoder(tocode, &cd->encoder))
54 return 1;
55 if (yaz_utf8_encoder(tocode, &cd->encoder))
56 return 1;
57 if (yaz_ucs4_encoder(tocode, &cd->encoder))
58 return 1;
59 if (yaz_iso_8859_1_encoder(tocode, &cd->encoder))
60 return 1;
61 if (yaz_iso_5428_encoder(tocode, &cd->encoder))
62 return 1;
63 if (yaz_advancegreek_encoder(tocode, &cd->encoder))
64 return 1;
65 if (yaz_wchar_encoder(tocode, &cd->encoder))
66 return 1;
67 if (yaz_danmarc_encoder(tocode, &cd->encoder))
68 return 1;
69 return 0;
70 }
71
prepare_decoders(yaz_iconv_t cd,const char * tocode)72 static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
73 {
74 if (yaz_marc8_decoder(tocode, &cd->decoder))
75 return 1;
76 if (yaz_iso5426_decoder(tocode, &cd->decoder))
77 return 1;
78 if (yaz_utf8_decoder(tocode, &cd->decoder))
79 return 1;
80 if (yaz_ucs4_decoder(tocode, &cd->decoder))
81 return 1;
82 if (yaz_iso_8859_1_decoder(tocode, &cd->decoder))
83 return 1;
84 if (yaz_iso_5428_decoder(tocode, &cd->decoder))
85 return 1;
86 if (yaz_advancegreek_decoder(tocode, &cd->decoder))
87 return 1;
88 if (yaz_wchar_decoder(tocode, &cd->decoder))
89 return 1;
90 if (yaz_danmarc_decoder(tocode, &cd->decoder))
91 return 1;
92 return 0;
93 }
94
yaz_iconv_open(const char * tocode,const char * fromcode)95 yaz_iconv_t yaz_iconv_open(const char *tocode, const char *fromcode)
96 {
97 yaz_iconv_t cd = (yaz_iconv_t) xmalloc(sizeof(*cd));
98
99 cd->encoder.data = 0;
100 cd->encoder.write_handle = 0;
101 cd->encoder.flush_handle = 0;
102 cd->encoder.init_handle = 0;
103 cd->encoder.destroy_handle = 0;
104
105 cd->decoder.data = 0;
106 cd->decoder.read_handle = 0;
107 cd->decoder.init_handle = 0;
108 cd->decoder.destroy_handle = 0;
109
110 cd->my_errno = YAZ_ICONV_UNKNOWN;
111
112 /* a useful hack: if fromcode has leading @,
113 the library not use YAZ's own conversions .. */
114 if (fromcode[0] == '@')
115 fromcode++;
116 else
117 {
118 prepare_encoders(cd, tocode);
119 prepare_decoders(cd, fromcode);
120 }
121 if (cd->decoder.read_handle && cd->encoder.write_handle)
122 {
123 #if HAVE_ICONV_H
124 cd->iconv_cd = (iconv_t) (-1);
125 #endif
126 ;
127 }
128 else
129 {
130 #if HAVE_ICONV_H
131 cd->iconv_cd = iconv_open(tocode, fromcode);
132 if (cd->iconv_cd == (iconv_t) (-1))
133 {
134 yaz_iconv_close(cd);
135 return 0;
136 }
137 #else
138 yaz_iconv_close(cd);
139 return 0;
140 #endif
141 }
142 cd->init_flag = 1;
143 return cd;
144 }
145
yaz_iconv(yaz_iconv_t cd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)146 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
147 char **outbuf, size_t *outbytesleft)
148 {
149 char *inbuf0 = 0;
150 size_t r = 0;
151
152 #if HAVE_ICONV_H
153 if (cd->iconv_cd != (iconv_t) (-1))
154 {
155 size_t r =
156 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
157 if (r == (size_t)(-1))
158 {
159 switch (yaz_errno())
160 {
161 case E2BIG:
162 cd->my_errno = YAZ_ICONV_E2BIG;
163 break;
164 case EINVAL:
165 cd->my_errno = YAZ_ICONV_EINVAL;
166 break;
167 case EILSEQ:
168 cd->my_errno = YAZ_ICONV_EILSEQ;
169 break;
170 default:
171 cd->my_errno = YAZ_ICONV_UNKNOWN;
172 }
173 }
174 return r;
175 }
176 #endif
177
178 if (inbuf)
179 inbuf0 = *inbuf;
180
181 if (cd->init_flag)
182 {
183 cd->my_errno = YAZ_ICONV_UNKNOWN;
184
185 if (cd->encoder.init_handle)
186 (*cd->encoder.init_handle)(&cd->encoder);
187
188 cd->unget_x = 0;
189 cd->no_read_x = 0;
190
191 if (cd->decoder.init_handle)
192 {
193 size_t no_read = 0;
194 size_t r = (cd->decoder.init_handle)(
195 cd, &cd->decoder,
196 inbuf ? (unsigned char *) *inbuf : 0,
197 inbytesleft ? *inbytesleft : 0,
198 &no_read);
199 if (r)
200 {
201 if (cd->my_errno == YAZ_ICONV_EINVAL)
202 return r;
203 cd->init_flag = 0;
204 return r;
205 }
206 if (inbytesleft)
207 *inbytesleft -= no_read;
208 if (inbuf)
209 *inbuf += no_read;
210 }
211 }
212 cd->init_flag = 0;
213
214 if (!inbuf || !*inbuf)
215 {
216 if (outbuf && *outbuf)
217 {
218 if (cd->unget_x)
219 r = (*cd->encoder.write_handle)(cd, &cd->encoder,
220 cd->unget_x, outbuf, outbytesleft);
221 if (cd->encoder.flush_handle)
222 r = (*cd->encoder.flush_handle)(cd, &cd->encoder,
223 outbuf, outbytesleft);
224 }
225 if (r == 0)
226 cd->init_flag = 1;
227 cd->unget_x = 0;
228 return r;
229 }
230 while (1)
231 {
232 unsigned long x;
233 size_t no_read;
234
235 if (cd->unget_x)
236 {
237 x = cd->unget_x;
238 no_read = cd->no_read_x;
239 }
240 else
241 {
242 if (*inbytesleft == 0)
243 {
244 r = *inbuf - inbuf0;
245 break;
246 }
247 x = (*cd->decoder.read_handle)(
248 cd, &cd->decoder,
249 (unsigned char *) *inbuf, *inbytesleft, &no_read);
250 if (no_read == 0)
251 {
252 r = (size_t)(-1);
253 break;
254 }
255 }
256 if (x)
257 {
258 r = (*cd->encoder.write_handle)(cd, &cd->encoder,
259 x, outbuf, outbytesleft);
260 if (r)
261 {
262 /* unable to write it. save it because read_handle cannot
263 rewind .. */
264 if (cd->my_errno == YAZ_ICONV_E2BIG)
265 {
266 cd->unget_x = x;
267 cd->no_read_x = no_read;
268 break;
269 }
270 }
271 cd->unget_x = 0;
272 }
273 *inbytesleft -= no_read;
274 (*inbuf) += no_read;
275 }
276 return r;
277 }
278
yaz_iconv_error(yaz_iconv_t cd)279 int yaz_iconv_error(yaz_iconv_t cd)
280 {
281 return cd->my_errno;
282 }
283
yaz_iconv_close(yaz_iconv_t cd)284 int yaz_iconv_close(yaz_iconv_t cd)
285 {
286 #if HAVE_ICONV_H
287 if (cd->iconv_cd != (iconv_t) (-1))
288 iconv_close(cd->iconv_cd);
289 #endif
290 if (cd->encoder.destroy_handle)
291 (*cd->encoder.destroy_handle)(&cd->encoder);
292 if (cd->decoder.destroy_handle)
293 (*cd->decoder.destroy_handle)(&cd->decoder);
294 xfree(cd);
295 return 0;
296 }
297
yaz_iconv_set_errno(yaz_iconv_t cd,int no)298 void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
299 {
300 cd->my_errno = no;
301 }
302
303 /*
304 * Local variables:
305 * c-basic-offset: 4
306 * c-file-style: "Stroustrup"
307 * indent-tabs-mode: nil
308 * End:
309 * vim: shiftwidth=4 tabstop=8 expandtab
310 */
311
312