1 /* Copyright (C) 1999, 2000 Free Software Foundation, Inc.
2    This file is part of the GNU IO Library.
3 
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License as
6    published by the Free Software Foundation; either version 2, or (at
7    your option) any later version.
8 
9    This library is distributed in the hope that it will be useful, but
10    WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this library; see the file COPYING.  If not, write to
16    the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
17    MA 02111-1307, USA.
18 
19    As a special exception, if you link this library with files
20    compiled with a GNU compiler to produce an executable, this does
21    not cause the resulting executable to be covered by the GNU General
22    Public License.  This exception does not however invalidate any
23    other reasons why the executable file might be covered by the GNU
24    General Public License.  */
25 
26 #include <libioP.h>
27 #ifdef _LIBC
28 # include <dlfcn.h>
29 # include <wchar.h>
30 # include <locale/localeinfo.h>
31 # include <wcsmbs/wcsmbsload.h>
32 # include <iconv/gconv_int.h>
33 #endif
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
38 # include <langinfo.h>
39 #endif
40 
41 #if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
42 /* Prototypes of libio's codecvt functions.  */
43 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
44 				     __c_mbstate_t *statep,
45 				     const wchar_t *from_start,
46 				     const wchar_t *from_end,
47 				     const wchar_t **from_stop, char *to_start,
48 				     char *to_end, char **to_stop);
49 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
50 					 __c_mbstate_t *statep, char *to_start,
51 					 char *to_end, char **to_stop);
52 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
53 				    __c_mbstate_t *statep,
54 				    const char *from_start,
55 				    const char *from_end,
56 				    const char **from_stop, wchar_t *to_start,
57 				    wchar_t *to_end, wchar_t **to_stop);
58 static int do_encoding (struct _IO_codecvt *codecvt);
59 static int do_length (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
60 		      const char *from_start,
61 		      const char *from_end, _IO_size_t max);
62 static int do_max_length (struct _IO_codecvt *codecvt);
63 static int do_always_noconv (struct _IO_codecvt *codecvt);
64 
65 
66 /* The functions used in `codecvt' for libio are always the same.  */
67 struct _IO_codecvt __libio_codecvt =
68 {
69   .__codecvt_destr = NULL,		/* Destructor, never used.  */
70   .__codecvt_do_out = do_out,
71   .__codecvt_do_unshift = do_unshift,
72   .__codecvt_do_in = do_in,
73   .__codecvt_do_encoding = do_encoding,
74   .__codecvt_do_always_noconv = do_always_noconv,
75   .__codecvt_do_length = do_length,
76   .__codecvt_do_max_length = do_max_length
77 };
78 
79 
80 #ifdef _LIBC
81 static struct __gconv_trans_data libio_translit =
82 {
83   .__trans_fct = __gconv_transliterate
84 };
85 #endif
86 #endif /* defined(GLIBCPP_USE_WCHAR_T) */
87 
88 /* Return orientation of stream.  If mode is nonzero try to change
89    the orientation first.  */
90 #undef _IO_fwide
91 int
_IO_fwide(fp,mode)92 _IO_fwide (fp, mode)
93      _IO_FILE *fp;
94      int mode;
95 {
96   /* Normalize the value.  */
97   mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
98 
99   if (mode == 0 || fp->_mode != 0)
100     /* The caller simply wants to know about the current orientation
101        or the orientation already has been determined.  */
102     return fp->_mode;
103 
104   /* Set the orientation appropriately.  */
105   if (mode > 0)
106     {
107 #if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
108       struct _IO_codecvt *cc = fp->_codecvt;
109 
110       fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
111       fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
112 
113 #ifdef _LIBC
114       /* Get the character conversion functions based on the currently
115 	 selected locale for LC_CTYPE.  */
116       {
117 	struct gconv_fcts fcts;
118 
119 	/* Clear the state.  We start all over again.  */
120 	memset (&fp->_wide_data->_IO_state, '\0', sizeof (__c_mbstate_t));
121 	memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__c_mbstate_t));
122 
123 	__wcsmbs_clone_conv (&fcts);
124 
125 	/* The functions are always the same.  */
126 	*cc = __libio_codecvt;
127 
128 	cc->__cd_in.__cd.__nsteps = 1; /* Only one step allowed.  */
129 	cc->__cd_in.__cd.__steps = fcts.towc;
130 
131 	cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
132 	cc->__cd_in.__cd.__data[0].__internal_use = 1;
133 	cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
134 	cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
135 
136 	/* XXX For now no transliteration.  */
137 	cc->__cd_in.__cd.__data[0].__trans = NULL;
138 
139 	cc->__cd_out.__cd.__nsteps = 1; /* Only one step allowed.  */
140 	cc->__cd_out.__cd.__steps = fcts.tomb;
141 
142 	cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
143 	cc->__cd_out.__cd.__data[0].__internal_use = 1;
144 	cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
145 	cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
146 
147 	/* And now the transliteration.  */
148 	cc->__cd_out.__cd.__data[0].__trans = &libio_translit;
149       }
150 #else
151 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
152       {
153 	/* Determine internal and external character sets.
154 	   XXX For now we make our life easy: we assume a fixed internal
155 	   encoding (as most sane systems have; hi HP/UX!).  If somebody
156 	   cares about systems which changing internal charsets they
157 	   should come up with a solution for the determination of the
158 	   currently used internal character set.  */
159 #if 0
160 	const char *internal_ccs = _G_INTERNAL_CCS;
161 	const char *external_ccs = nl_langinfo(CODESET);
162 
163 	if (external_ccs == NULL)
164 	  external_ccs = "ISO-8859-1";
165 
166 	cc->__cd_in = iconv_open (internal_ccs, external_ccs);
167 	if (cc->__cd_in != (iconv_t) -1)
168 	  cc->__cd_out = iconv_open (external_ccs, internal_ccs);
169 #endif
170       }
171 # else
172 #  error "somehow determine this from LC_CTYPE"
173 # endif
174 #endif
175 
176       /* From now on use the wide character callback functions.  */
177       ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
178 #else /* !defined(_GLIBCPP_USE_WCHAR_T) */
179       mode = fp->_mode;
180 #endif /* !defined(_GLIBCPP_USE_WCHAR_T) */
181     }
182 
183   /* Set the mode now.  */
184   fp->_mode = mode;
185 
186   return mode;
187 }
188 
189 #ifdef weak_alias
weak_alias(_IO_fwide,fwide)190 weak_alias (_IO_fwide, fwide)
191 #endif
192 
193 #if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
194 
195 static enum __codecvt_result
196 do_out (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
197 	const wchar_t *from_start, const wchar_t *from_end,
198 	const wchar_t **from_stop, char *to_start, char *to_end,
199 	char **to_stop)
200 {
201   enum __codecvt_result result;
202 
203 #ifdef _LIBC
204   struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
205   int status;
206   size_t dummy;
207   const unsigned char *from_start_copy = (unsigned char *) from_start;
208 
209   codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
210   codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
211   codecvt->__cd_out.__cd.__data[0].__statep = statep;
212 
213   status = DL_CALL_FCT (gs->__fct,
214 			(gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
215 			 (const unsigned char *) from_end, NULL,
216 			 &dummy, 0, 0));
217 
218   *from_stop = (wchar_t *) from_start_copy;
219   *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
220 
221   switch (status)
222     {
223     case __GCONV_OK:
224     case __GCONV_EMPTY_INPUT:
225       result = __codecvt_ok;
226       break;
227 
228     case __GCONV_FULL_OUTPUT:
229     case __GCONV_INCOMPLETE_INPUT:
230       result = __codecvt_partial;
231       break;
232 
233     default:
234       result = __codecvt_error;
235       break;
236     }
237 #else
238 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
239   size_t res;
240   const char *from_start_copy = (const char *) from_start;
241   size_t from_len = from_end - from_start;
242   char *to_start_copy = to_start;
243   size_t to_len = to_end - to_start;
244   res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
245 	       &to_start_copy, &to_len);
246 
247   if (res == 0 || from_len == 0)
248     result = __codecvt_ok;
249   else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
250     result = __codecvt_partial;
251   else
252     result = __codecvt_error;
253 
254 # else
255   /* Decide what to do.  */
256   result = __codecvt_error;
257 # endif
258 #endif
259 
260   return result;
261 }
262 
263 
264 static enum __codecvt_result
do_unshift(struct _IO_codecvt * codecvt,__c_mbstate_t * statep,char * to_start,char * to_end,char ** to_stop)265 do_unshift (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
266 	    char *to_start, char *to_end, char **to_stop)
267 {
268   enum __codecvt_result result;
269 
270 #ifdef _LIBC
271   struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
272   int status;
273   size_t dummy;
274 
275   codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
276   codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
277   codecvt->__cd_out.__cd.__data[0].__statep = statep;
278 
279   status = DL_CALL_FCT (gs->__fct,
280 			(gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
281 			 NULL, &dummy, 1, 0));
282 
283   *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
284 
285   switch (status)
286     {
287     case __GCONV_OK:
288     case __GCONV_EMPTY_INPUT:
289       result = __codecvt_ok;
290       break;
291 
292     case __GCONV_FULL_OUTPUT:
293     case __GCONV_INCOMPLETE_INPUT:
294       result = __codecvt_partial;
295       break;
296 
297     default:
298       result = __codecvt_error;
299       break;
300     }
301 #else
302 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
303   size_t res;
304   char *to_start_copy = (char *) to_start;
305   size_t to_len = to_end - to_start;
306 
307   res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
308 
309   if (res == 0)
310     result = __codecvt_ok;
311   else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
312     result = __codecvt_partial;
313   else
314     result = __codecvt_error;
315 # else
316   /* Decide what to do.  */
317   result = __codecvt_error;
318 # endif
319 #endif
320 
321   return result;
322 }
323 
324 
325 static enum __codecvt_result
do_in(struct _IO_codecvt * codecvt,__c_mbstate_t * statep,const char * from_start,const char * from_end,const char ** from_stop,wchar_t * to_start,wchar_t * to_end,wchar_t ** to_stop)326 do_in (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
327        const char *from_start, const char *from_end, const char **from_stop,
328        wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
329 {
330   enum __codecvt_result result;
331 
332 #ifdef _LIBC
333   struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
334   int status;
335   size_t dummy;
336   const unsigned char *from_start_copy = (unsigned char *) from_start;
337 
338   codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start;
339   codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end;
340   codecvt->__cd_in.__cd.__data[0].__statep = statep;
341 
342   status = DL_CALL_FCT (gs->__fct,
343 			(gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
344 			 from_end, NULL, &dummy, 0, 0));
345 
346   *from_stop = from_start_copy;
347   *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
348 
349   switch (status)
350     {
351     case __GCONV_OK:
352     case __GCONV_EMPTY_INPUT:
353       result = __codecvt_ok;
354       break;
355 
356     case __GCONV_FULL_OUTPUT:
357     case __GCONV_INCOMPLETE_INPUT:
358       result = __codecvt_partial;
359       break;
360 
361     default:
362       result = __codecvt_error;
363       break;
364     }
365 #else
366 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
367   size_t res;
368   const char *from_start_copy = (const char *) from_start;
369   size_t from_len = from_end - from_start;
370   char *to_start_copy = (char *) from_start;
371   size_t to_len = to_end - to_start;
372 
373   res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
374 	       &to_start_copy, &to_len);
375 
376   if (res == 0)
377     result = __codecvt_ok;
378   else if (to_len == 0)
379     result = __codecvt_partial;
380   else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
381     result = __codecvt_partial;
382   else
383     result = __codecvt_error;
384 # else
385   /* Decide what to do.  */
386   result = __codecvt_error;
387 # endif
388 #endif
389 
390   return result;
391 }
392 
393 
394 static int
do_encoding(struct _IO_codecvt * codecvt)395 do_encoding (struct _IO_codecvt *codecvt)
396 {
397 #ifdef _LIBC
398   /* See whether the encoding is stateful.  */
399   if (codecvt->__cd_in.__cd.__steps[0].__stateful)
400     return -1;
401   /* Fortunately not.  Now determine the input bytes for the conversion
402      necessary for each wide character.  */
403   if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
404       != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
405     /* Not a constant value.  */
406     return 0;
407 
408   return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
409 #else
410   /* Worst case scenario.  */
411   return -1;
412 #endif
413 }
414 
415 
416 static int
do_always_noconv(struct _IO_codecvt * codecvt)417 do_always_noconv (struct _IO_codecvt *codecvt)
418 {
419   return 0;
420 }
421 
422 
423 static int
do_length(struct _IO_codecvt * codecvt,__c_mbstate_t * statep,const char * from_start,const char * from_end,_IO_size_t max)424 do_length (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
425 	   const char *from_start, const char *from_end, _IO_size_t max)
426 {
427   int result;
428 #ifdef _LIBC
429   const unsigned char *cp = (const unsigned char *) from_start;
430   wchar_t to_buf[max];
431   struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
432   int status;
433   size_t dummy;
434 
435   codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf;
436   codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max];
437   codecvt->__cd_in.__cd.__data[0].__statep = statep;
438 
439   status = DL_CALL_FCT (gs->__fct,
440 			(gs, codecvt->__cd_in.__cd.__data, &cp, from_end,
441 			 NULL, &dummy, 0, 0));
442 
443   result = cp - (const unsigned char *) from_start;
444 #else
445 # if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
446   const char *from_start_copy = (const char *) from_start;
447   size_t from_len = from_end - from_start;
448   wchar_t to_buf[max];
449   size_t res;
450   char *to_start = (char *) to_buf;
451 
452   res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
453 	       &to_start, &max);
454 
455   result = from_start_copy - (char *) from_start;
456 # else
457   /* Decide what to do.  */
458   result = 0;
459 # endif
460 #endif
461 
462   return result;
463 }
464 
465 
466 static int
do_max_length(struct _IO_codecvt * codecvt)467 do_max_length (struct _IO_codecvt *codecvt)
468 {
469 #ifdef _LIBC
470   return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
471 #else
472   return MB_CUR_MAX;
473 #endif
474 }
475 
476 #endif /* defined(_GLIBCPP_USE_WCHAR_T) */
477