1 /* Copyright (C) 1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU IO Library.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2, or (at
7 your option) any later version.
8
9 This library is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this library; see the file COPYING. If not, write to
16 the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
17 MA 02111-1307, USA.
18
19 As a special exception, if you link this library with files
20 compiled with a GNU compiler to produce an executable, this does
21 not cause the resulting executable to be covered by the GNU General
22 Public License. This exception does not however invalidate any
23 other reasons why the executable file might be covered by the GNU
24 General Public License. */
25
26 #include <libioP.h>
27 #ifdef _LIBC
28 # include <dlfcn.h>
29 # include <wchar.h>
30 # include <locale/localeinfo.h>
31 # include <wcsmbs/wcsmbsload.h>
32 # include <iconv/gconv_int.h>
33 #endif
34 #include <stdlib.h>
35 #include <string.h>
36
37 #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
38 # include <langinfo.h>
39 #endif
40
41 #if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
42 /* Prototypes of libio's codecvt functions. */
43 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
44 __c_mbstate_t *statep,
45 const wchar_t *from_start,
46 const wchar_t *from_end,
47 const wchar_t **from_stop, char *to_start,
48 char *to_end, char **to_stop);
49 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
50 __c_mbstate_t *statep, char *to_start,
51 char *to_end, char **to_stop);
52 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
53 __c_mbstate_t *statep,
54 const char *from_start,
55 const char *from_end,
56 const char **from_stop, wchar_t *to_start,
57 wchar_t *to_end, wchar_t **to_stop);
58 static int do_encoding (struct _IO_codecvt *codecvt);
59 static int do_length (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
60 const char *from_start,
61 const char *from_end, _IO_size_t max);
62 static int do_max_length (struct _IO_codecvt *codecvt);
63 static int do_always_noconv (struct _IO_codecvt *codecvt);
64
65
66 /* The functions used in `codecvt' for libio are always the same. */
67 struct _IO_codecvt __libio_codecvt =
68 {
69 .__codecvt_destr = NULL, /* Destructor, never used. */
70 .__codecvt_do_out = do_out,
71 .__codecvt_do_unshift = do_unshift,
72 .__codecvt_do_in = do_in,
73 .__codecvt_do_encoding = do_encoding,
74 .__codecvt_do_always_noconv = do_always_noconv,
75 .__codecvt_do_length = do_length,
76 .__codecvt_do_max_length = do_max_length
77 };
78
79
80 #ifdef _LIBC
81 static struct __gconv_trans_data libio_translit =
82 {
83 .__trans_fct = __gconv_transliterate
84 };
85 #endif
86 #endif /* defined(GLIBCPP_USE_WCHAR_T) */
87
88 /* Return orientation of stream. If mode is nonzero try to change
89 the orientation first. */
90 #undef _IO_fwide
91 int
_IO_fwide(fp,mode)92 _IO_fwide (fp, mode)
93 _IO_FILE *fp;
94 int mode;
95 {
96 /* Normalize the value. */
97 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
98
99 if (mode == 0 || fp->_mode != 0)
100 /* The caller simply wants to know about the current orientation
101 or the orientation already has been determined. */
102 return fp->_mode;
103
104 /* Set the orientation appropriately. */
105 if (mode > 0)
106 {
107 #if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
108 struct _IO_codecvt *cc = fp->_codecvt;
109
110 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
111 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
112
113 #ifdef _LIBC
114 /* Get the character conversion functions based on the currently
115 selected locale for LC_CTYPE. */
116 {
117 struct gconv_fcts fcts;
118
119 /* Clear the state. We start all over again. */
120 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__c_mbstate_t));
121 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__c_mbstate_t));
122
123 __wcsmbs_clone_conv (&fcts);
124
125 /* The functions are always the same. */
126 *cc = __libio_codecvt;
127
128 cc->__cd_in.__cd.__nsteps = 1; /* Only one step allowed. */
129 cc->__cd_in.__cd.__steps = fcts.towc;
130
131 cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
132 cc->__cd_in.__cd.__data[0].__internal_use = 1;
133 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
134 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
135
136 /* XXX For now no transliteration. */
137 cc->__cd_in.__cd.__data[0].__trans = NULL;
138
139 cc->__cd_out.__cd.__nsteps = 1; /* Only one step allowed. */
140 cc->__cd_out.__cd.__steps = fcts.tomb;
141
142 cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
143 cc->__cd_out.__cd.__data[0].__internal_use = 1;
144 cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
145 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
146
147 /* And now the transliteration. */
148 cc->__cd_out.__cd.__data[0].__trans = &libio_translit;
149 }
150 #else
151 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
152 {
153 /* Determine internal and external character sets.
154 XXX For now we make our life easy: we assume a fixed internal
155 encoding (as most sane systems have; hi HP/UX!). If somebody
156 cares about systems which changing internal charsets they
157 should come up with a solution for the determination of the
158 currently used internal character set. */
159 #if 0
160 const char *internal_ccs = _G_INTERNAL_CCS;
161 const char *external_ccs = nl_langinfo(CODESET);
162
163 if (external_ccs == NULL)
164 external_ccs = "ISO-8859-1";
165
166 cc->__cd_in = iconv_open (internal_ccs, external_ccs);
167 if (cc->__cd_in != (iconv_t) -1)
168 cc->__cd_out = iconv_open (external_ccs, internal_ccs);
169 #endif
170 }
171 # else
172 # error "somehow determine this from LC_CTYPE"
173 # endif
174 #endif
175
176 /* From now on use the wide character callback functions. */
177 ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
178 #else /* !defined(_GLIBCPP_USE_WCHAR_T) */
179 mode = fp->_mode;
180 #endif /* !defined(_GLIBCPP_USE_WCHAR_T) */
181 }
182
183 /* Set the mode now. */
184 fp->_mode = mode;
185
186 return mode;
187 }
188
189 #ifdef weak_alias
weak_alias(_IO_fwide,fwide)190 weak_alias (_IO_fwide, fwide)
191 #endif
192
193 #if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
194
195 static enum __codecvt_result
196 do_out (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
197 const wchar_t *from_start, const wchar_t *from_end,
198 const wchar_t **from_stop, char *to_start, char *to_end,
199 char **to_stop)
200 {
201 enum __codecvt_result result;
202
203 #ifdef _LIBC
204 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
205 int status;
206 size_t dummy;
207 const unsigned char *from_start_copy = (unsigned char *) from_start;
208
209 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
210 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
211 codecvt->__cd_out.__cd.__data[0].__statep = statep;
212
213 status = DL_CALL_FCT (gs->__fct,
214 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
215 (const unsigned char *) from_end, NULL,
216 &dummy, 0, 0));
217
218 *from_stop = (wchar_t *) from_start_copy;
219 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
220
221 switch (status)
222 {
223 case __GCONV_OK:
224 case __GCONV_EMPTY_INPUT:
225 result = __codecvt_ok;
226 break;
227
228 case __GCONV_FULL_OUTPUT:
229 case __GCONV_INCOMPLETE_INPUT:
230 result = __codecvt_partial;
231 break;
232
233 default:
234 result = __codecvt_error;
235 break;
236 }
237 #else
238 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
239 size_t res;
240 const char *from_start_copy = (const char *) from_start;
241 size_t from_len = from_end - from_start;
242 char *to_start_copy = to_start;
243 size_t to_len = to_end - to_start;
244 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
245 &to_start_copy, &to_len);
246
247 if (res == 0 || from_len == 0)
248 result = __codecvt_ok;
249 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
250 result = __codecvt_partial;
251 else
252 result = __codecvt_error;
253
254 # else
255 /* Decide what to do. */
256 result = __codecvt_error;
257 # endif
258 #endif
259
260 return result;
261 }
262
263
264 static enum __codecvt_result
do_unshift(struct _IO_codecvt * codecvt,__c_mbstate_t * statep,char * to_start,char * to_end,char ** to_stop)265 do_unshift (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
266 char *to_start, char *to_end, char **to_stop)
267 {
268 enum __codecvt_result result;
269
270 #ifdef _LIBC
271 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
272 int status;
273 size_t dummy;
274
275 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
276 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
277 codecvt->__cd_out.__cd.__data[0].__statep = statep;
278
279 status = DL_CALL_FCT (gs->__fct,
280 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
281 NULL, &dummy, 1, 0));
282
283 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
284
285 switch (status)
286 {
287 case __GCONV_OK:
288 case __GCONV_EMPTY_INPUT:
289 result = __codecvt_ok;
290 break;
291
292 case __GCONV_FULL_OUTPUT:
293 case __GCONV_INCOMPLETE_INPUT:
294 result = __codecvt_partial;
295 break;
296
297 default:
298 result = __codecvt_error;
299 break;
300 }
301 #else
302 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
303 size_t res;
304 char *to_start_copy = (char *) to_start;
305 size_t to_len = to_end - to_start;
306
307 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
308
309 if (res == 0)
310 result = __codecvt_ok;
311 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
312 result = __codecvt_partial;
313 else
314 result = __codecvt_error;
315 # else
316 /* Decide what to do. */
317 result = __codecvt_error;
318 # endif
319 #endif
320
321 return result;
322 }
323
324
325 static enum __codecvt_result
do_in(struct _IO_codecvt * codecvt,__c_mbstate_t * statep,const char * from_start,const char * from_end,const char ** from_stop,wchar_t * to_start,wchar_t * to_end,wchar_t ** to_stop)326 do_in (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
327 const char *from_start, const char *from_end, const char **from_stop,
328 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
329 {
330 enum __codecvt_result result;
331
332 #ifdef _LIBC
333 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
334 int status;
335 size_t dummy;
336 const unsigned char *from_start_copy = (unsigned char *) from_start;
337
338 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start;
339 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end;
340 codecvt->__cd_in.__cd.__data[0].__statep = statep;
341
342 status = DL_CALL_FCT (gs->__fct,
343 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
344 from_end, NULL, &dummy, 0, 0));
345
346 *from_stop = from_start_copy;
347 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
348
349 switch (status)
350 {
351 case __GCONV_OK:
352 case __GCONV_EMPTY_INPUT:
353 result = __codecvt_ok;
354 break;
355
356 case __GCONV_FULL_OUTPUT:
357 case __GCONV_INCOMPLETE_INPUT:
358 result = __codecvt_partial;
359 break;
360
361 default:
362 result = __codecvt_error;
363 break;
364 }
365 #else
366 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
367 size_t res;
368 const char *from_start_copy = (const char *) from_start;
369 size_t from_len = from_end - from_start;
370 char *to_start_copy = (char *) from_start;
371 size_t to_len = to_end - to_start;
372
373 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
374 &to_start_copy, &to_len);
375
376 if (res == 0)
377 result = __codecvt_ok;
378 else if (to_len == 0)
379 result = __codecvt_partial;
380 else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
381 result = __codecvt_partial;
382 else
383 result = __codecvt_error;
384 # else
385 /* Decide what to do. */
386 result = __codecvt_error;
387 # endif
388 #endif
389
390 return result;
391 }
392
393
394 static int
do_encoding(struct _IO_codecvt * codecvt)395 do_encoding (struct _IO_codecvt *codecvt)
396 {
397 #ifdef _LIBC
398 /* See whether the encoding is stateful. */
399 if (codecvt->__cd_in.__cd.__steps[0].__stateful)
400 return -1;
401 /* Fortunately not. Now determine the input bytes for the conversion
402 necessary for each wide character. */
403 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
404 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
405 /* Not a constant value. */
406 return 0;
407
408 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
409 #else
410 /* Worst case scenario. */
411 return -1;
412 #endif
413 }
414
415
416 static int
do_always_noconv(struct _IO_codecvt * codecvt)417 do_always_noconv (struct _IO_codecvt *codecvt)
418 {
419 return 0;
420 }
421
422
423 static int
do_length(struct _IO_codecvt * codecvt,__c_mbstate_t * statep,const char * from_start,const char * from_end,_IO_size_t max)424 do_length (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
425 const char *from_start, const char *from_end, _IO_size_t max)
426 {
427 int result;
428 #ifdef _LIBC
429 const unsigned char *cp = (const unsigned char *) from_start;
430 wchar_t to_buf[max];
431 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
432 int status;
433 size_t dummy;
434
435 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf;
436 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max];
437 codecvt->__cd_in.__cd.__data[0].__statep = statep;
438
439 status = DL_CALL_FCT (gs->__fct,
440 (gs, codecvt->__cd_in.__cd.__data, &cp, from_end,
441 NULL, &dummy, 0, 0));
442
443 result = cp - (const unsigned char *) from_start;
444 #else
445 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
446 const char *from_start_copy = (const char *) from_start;
447 size_t from_len = from_end - from_start;
448 wchar_t to_buf[max];
449 size_t res;
450 char *to_start = (char *) to_buf;
451
452 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
453 &to_start, &max);
454
455 result = from_start_copy - (char *) from_start;
456 # else
457 /* Decide what to do. */
458 result = 0;
459 # endif
460 #endif
461
462 return result;
463 }
464
465
466 static int
do_max_length(struct _IO_codecvt * codecvt)467 do_max_length (struct _IO_codecvt *codecvt)
468 {
469 #ifdef _LIBC
470 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
471 #else
472 return MB_CUR_MAX;
473 #endif
474 }
475
476 #endif /* defined(_GLIBCPP_USE_WCHAR_T) */
477