1 /* winduni.c -- unicode support for the windres program.
2    Copyright 1997, 1998, 2000, 2001, 2003, 2007
3    Free Software Foundation, Inc.
4    Written by Ian Lance Taylor, Cygnus Support.
5    Rewritten by Kai Tietz, Onevision.
6 
7    This file is part of GNU Binutils.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
22    02110-1301, USA.  */
23 
24 
25 /* This file contains unicode support routines for the windres
26    program.  Ideally, we would have generic unicode support which
27    would work on all systems.  However, we don't.  Instead, on a
28    Windows host, we are prepared to call some Windows routines.  This
29    means that we will generate different output on Windows and Unix
30    hosts, but that seems better than not really supporting unicode at
31    all.  */
32 
33 #include "sysdep.h"
34 #include "bfd.h"
35 #include "libiberty.h" /* for xstrdup */
36 #include "bucomm.h"
37 /* Must be include before windows.h and winnls.h.  */
38 #if defined (_WIN32) || defined (__CYGWIN__)
39 #include <windows.h>
40 #include <winnls.h>
41 #endif
42 #include "winduni.h"
43 #include "safe-ctype.h"
44 
45 #if HAVE_ICONV_H
46 #include <iconv.h>
47 #endif
48 
49 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
50 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
51 static int unichar_isascii (const unichar *, rc_uint_type);
52 
53 /* Convert an ASCII string to a unicode string.  We just copy it,
54    expanding chars to shorts, rather than doing something intelligent.  */
55 
56 #if !defined (_WIN32) && !defined (__CYGWIN__)
57 
58 /* Codepages mapped.  */
59 static local_iconv_map codepages[] =
60 {
61   { 0, "MS-ANSI" },
62   { 1, "WINDOWS-1252" },
63   { 437, "MS-ANSI" },
64   { 737, "MS-GREEK" },
65   { 775, "WINBALTRIM" },
66   { 850, "MS-ANSI" },
67   { 852, "MS-EE" },
68   { 857, "MS-TURK" },
69   { 862, "CP862" },
70   { 864, "CP864" },
71   { 866, "MS-CYRL" },
72   { 874, "WINDOWS-874" },
73   { 932, "CP932" },
74   { 936, "CP936" },
75   { 949, "CP949" },
76   { 950, "CP950" },
77   { 1250, "WINDOWS-1250" },
78   { 1251, "WINDOWS-1251" },
79   { 1252, "WINDOWS-1252" },
80   { 1253, "WINDOWS-1253" },
81   { 1254, "WINDOWS-1254" },
82   { 1255, "WINDOWS-1255" },
83   { 1256, "WINDOWS-1256" },
84   { 1257, "WINDOWS-1257" },
85   { 1258, "WINDOWS-1258" },
86   { CP_UTF7, "UTF-7" },
87   { CP_UTF8, "UTF-8" },
88   { CP_UTF16, "UTF-16" },
89   { (rc_uint_type) -1, NULL }
90 };
91 
92 /* Languages supported.  */
93 static const wind_language_t languages[] =
94 {
95   { 0x0000, 437, 1252, "Neutral", "Neutral" },
96   { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
97   { 0x0403, 850, 1252, "Catalan", "Spain" },	      { 0x0404, 950,  950, "Chinese", "Taiwan" },
98   { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
99   { 0x0407, 850, 1252, "German", "Germany" },	      { 0x0408, 737, 1253, "Greek", "Greece" },
100   { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
101   { 0x040B, 850, 1252, "Finnish", "Finland" },	      { 0x040C, 850, 1252, "French", "France" },
102   { 0x040D, 862, 1255, "Hebrew", "Israel" },	      { 0x040E, 852, 1250, "Hungarian", "Hungary" },
103   { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
104   { 0x0411, 932,  932, "Japanese", "Japan" },	      { 0x0412, 949,  949, "Korean", "Korea (south)" },
105   { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokm�l)", "Norway" },
106   { 0x0415, 852, 1250, "Polish", "Poland" },	      { 0x0416, 850, 1252, "Portuguese", "Brazil" },
107   { 0x0418, 852, 1250, "Romanian", "Romania" },	      { 0x0419, 866, 1251, "Russian", "Russia" },
108   { 0x041A, 852, 1250, "Croatian", "Croatia" },	      { 0x041B, 852, 1250, "Slovak", "Slovakia" },
109   { 0x041C, 852, 1250, "Albanian", "Albania" },	      { 0x041D, 850, 1252, "Swedish", "Sweden" },
110   { 0x041E, 874,  874, "Thai", "Thailand" },	      { 0x041F, 857, 1254, "Turkish", "Turkey" },
111   { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
112   { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
113   { 0x0425, 775, 1257, "Estonian", "Estonia" },	      { 0x0426, 775, 1257, "Latvian", "Latvia" },
114   { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
115   { 0x0429, 864, 1256, "Arabic", "Farsi" },	      { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
116   { 0x042D, 850, 1252, "Basque", "Spain" },
117   { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
118   { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
119   { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
120   { 0x043C, 437, 1252, "Irish", "Ireland" },
121   { 0x043E, 850, 1252, "Malay", "Malaysia" },
122   { 0x0801, 864, 1256, "Arabic", "Iraq" },
123   { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
124   { 0x0807, 850, 1252, "German", "Switzerland" },
125   { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
126   { 0x080C, 850, 1252, "French", "Belgium" },
127   { 0x0810, 850, 1252, "Italian", "Switzerland" },
128   { 0x0813, 850, 1252, "Dutch", "Belgium" },	      { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
129   { 0x0816, 850, 1252, "Portuguese", "Portugal" },
130   { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
131   { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
132   { 0x0C01, 864, 1256, "Arabic", "Egypt" },
133   { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
134   { 0x0C07, 850, 1252, "German", "Austria" },
135   { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
136   { 0x0C0C, 850, 1252, "French", "Canada"},
137   { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
138   { 0x1001, 864, 1256, "Arabic", "Libya" },
139   { 0x1004, 936,  936, "Chinese", "Singapore" },
140   { 0x1007, 850, 1252, "German", "Luxembourg" },
141   { 0x1009, 850, 1252, "English", "Canada" },
142   { 0x100A, 850, 1252, "Spanish", "Guatemala" },
143   { 0x100C, 850, 1252, "French", "Switzerland" },
144   { 0x1401, 864, 1256, "Arabic", "Algeria" },
145   { 0x1407, 850, 1252, "German", "Liechtenstein" },
146   { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
147   { 0x140C, 850, 1252, "French", "Luxembourg" },
148   { 0x1801, 864, 1256, "Arabic", "Morocco" },
149   { 0x1809, 850, 1252, "English", "Ireland" },	      { 0x180A, 850, 1252, "Spanish", "Panama" },
150   { 0x180C, 850, 1252, "French", "Monaco" },
151   { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
152   { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
153   { 0x2001, 864, 1256, "Arabic", "Oman" },
154   { 0x2009, 850, 1252, "English", "Jamaica" },	      { 0x200A, 850, 1252, "Spanish", "Venezuela" },
155   { 0x2401, 864, 1256, "Arabic", "Yemen" },
156   { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
157   { 0x2801, 864, 1256, "Arabic", "Syria" },
158   { 0x2809, 850, 1252, "English", "Belize" },	      { 0x280A, 850, 1252, "Spanish", "Peru" },
159   { 0x2C01, 864, 1256, "Arabic", "Jordan" },
160   { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
161   { 0x3001, 864, 1256, "Arabic", "Lebanon" },
162   { 0x3009, 437, 1252, "English", "Zimbabwe" },	      { 0x300A, 850, 1252, "Spanish", "Ecuador" },
163   { 0x3401, 864, 1256, "Arabic", "Kuwait" },
164   { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
165   { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
166   { 0x380A, 850, 1252, "Spanish", "Uruguay" },
167   { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
168   { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
169   { 0x4001, 864, 1256, "Arabic", "Qatar" },
170   { 0x400A, 850, 1252, "Spanish", "Bolivia" },
171   { 0x440A, 850, 1252, "Spanish", "El Salvador" },
172   { 0x480A, 850, 1252, "Spanish", "Honduras" },
173   { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
174   { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
175   { (unsigned) -1,  0,      0, NULL, NULL }
176 };
177 
178 #endif
179 
180 /* Specifies the default codepage to be used for unicode
181    transformations.  By default this is CP_ACP.  */
182 rc_uint_type wind_default_codepage = CP_ACP;
183 
184 /* Specifies the currently used codepage for unicode
185    transformations.  By default this is CP_ACP.  */
186 rc_uint_type wind_current_codepage = CP_ACP;
187 
188 /* Convert an ASCII string to a unicode string.  We just copy it,
189    expanding chars to shorts, rather than doing something intelligent.  */
190 
191 void
192 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
193 {
194   unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
195 }
196 
197 /* Convert an unicode string to an ASCII string.  We just copy it,
198    shrink shorts to chars, rather than doing something intelligent.
199    Shorts with not within the char range are replaced by '_'.  */
200 
201 void
202 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
203 {
204   codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
205 }
206 
207 /* Print the unicode string UNICODE to the file E.  LENGTH is the
208    number of characters to print, or -1 if we should print until the
209    end of the string.  FIXME: On a Windows host, we should be calling
210    some Windows function, probably WideCharToMultiByte.  */
211 
212 void
213 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
214 {
215   while (1)
216     {
217       unichar ch;
218 
219       if (length == 0)
220 	return;
221       if ((bfd_signed_vma) length > 0)
222 	--length;
223 
224       ch = *unicode;
225 
226       if (ch == 0 && (bfd_signed_vma) length < 0)
227 	return;
228 
229       ++unicode;
230 
231       if ((ch & 0x7f) == ch)
232 	{
233 	  if (ch == '\\')
234 	    fputs ("\\\\", e);
235 	  else if (ch == '"')
236 	    fputs ("\"\"", e);
237 	  else if (ISPRINT (ch))
238 	    putc (ch, e);
239 	  else
240 	    {
241 	      switch (ch)
242 		{
243 		case ESCAPE_A:
244 		  fputs ("\\a", e);
245 		  break;
246 
247 		case ESCAPE_B:
248 		  fputs ("\\b", e);
249 		  break;
250 
251 		case ESCAPE_F:
252 		  fputs ("\\f", e);
253 		  break;
254 
255 		case ESCAPE_N:
256 		  fputs ("\\n", e);
257 		  break;
258 
259 		case ESCAPE_R:
260 		  fputs ("\\r", e);
261 		  break;
262 
263 		case ESCAPE_T:
264 		  fputs ("\\t", e);
265 		  break;
266 
267 		case ESCAPE_V:
268 		  fputs ("\\v", e);
269 		  break;
270 
271 		default:
272 		  fprintf (e, "\\%03o", (unsigned int) ch);
273 		  break;
274 		}
275 	    }
276 	}
277       else if ((ch & 0xff) == ch)
278 	fprintf (e, "\\%03o", (unsigned int) ch);
279       else
280 	fprintf (e, "\\x%04x", (unsigned int) ch);
281     }
282 }
283 
284 /* Print a unicode string to a file.  */
285 
286 void
287 ascii_print (FILE *e, const char *s, rc_uint_type length)
288 {
289   while (1)
290     {
291       char ch;
292 
293       if (length == 0)
294 	return;
295       if ((bfd_signed_vma) length > 0)
296 	--length;
297 
298       ch = *s;
299 
300       if (ch == 0 && (bfd_signed_vma) length < 0)
301 	return;
302 
303       ++s;
304 
305       if ((ch & 0x7f) == ch)
306 	{
307 	  if (ch == '\\')
308 	    fputs ("\\\\", e);
309 	  else if (ch == '"')
310 	    fputs ("\"\"", e);
311 	  else if (ISPRINT (ch))
312 	    putc (ch, e);
313 	  else
314 	    {
315 	      switch (ch)
316 		{
317 		case ESCAPE_A:
318 		  fputs ("\\a", e);
319 		  break;
320 
321 		case ESCAPE_B:
322 		  fputs ("\\b", e);
323 		  break;
324 
325 		case ESCAPE_F:
326 		  fputs ("\\f", e);
327 		  break;
328 
329 		case ESCAPE_N:
330 		  fputs ("\\n", e);
331 		  break;
332 
333 		case ESCAPE_R:
334 		  fputs ("\\r", e);
335 		  break;
336 
337 		case ESCAPE_T:
338 		  fputs ("\\t", e);
339 		  break;
340 
341 		case ESCAPE_V:
342 		  fputs ("\\v", e);
343 		  break;
344 
345 		default:
346 		  fprintf (e, "\\%03o", (unsigned int) ch);
347 		  break;
348 		}
349 	    }
350 	}
351       else
352 	fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
353     }
354 }
355 
356 rc_uint_type
357 unichar_len (const unichar *unicode)
358 {
359   rc_uint_type r = 0;
360 
361   if (unicode)
362     while (unicode[r] != 0)
363       r++;
364   else
365     --r;
366   return r;
367 }
368 
369 unichar *
370 unichar_dup (const unichar *unicode)
371 {
372   unichar *r;
373   int len;
374 
375   if (! unicode)
376     return NULL;
377   for (len = 0; unicode[len] != 0; ++len)
378     ;
379   ++len;
380   r = ((unichar *) res_alloc (len * sizeof (unichar)));
381   memcpy (r, unicode, len * sizeof (unichar));
382   return r;
383 }
384 
385 unichar *
386 unichar_dup_uppercase (const unichar *u)
387 {
388   unichar *r = unichar_dup (u);
389   int i;
390 
391   if (! r)
392     return NULL;
393 
394   for (i = 0; r[i] != 0; ++i)
395     {
396       if (r[i] >= 'a' && r[i] <= 'z')
397 	r[i] &= 0xdf;
398     }
399   return r;
400 }
401 
402 static int
403 unichar_isascii (const unichar *u, rc_uint_type len)
404 {
405   rc_uint_type i;
406 
407   if ((bfd_signed_vma) len < 0)
408     {
409       if (u)
410 	len = (rc_uint_type) unichar_len (u);
411       else
412 	len = 0;
413     }
414 
415   for (i = 0; i < len; i++)
416     if ((u[i] & 0xff80) != 0)
417       return 0;
418   return 1;
419 }
420 
421 void
422 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
423 {
424   if (! unichar_isascii (u, len))
425     fputc ('L', e);
426   fputc ('"', e);
427   unicode_print (e, u, len);
428   fputc ('"', e);
429 }
430 
431 int
432 unicode_is_valid_codepage (rc_uint_type cp)
433 {
434   if ((cp & 0xffff) != cp)
435     return 0;
436   if (cp == CP_UTF16 || cp == CP_ACP)
437     return 1;
438 
439 #if !defined (_WIN32) && !defined (__CYGWIN__)
440   if (! wind_find_codepage_info (cp))
441     return 0;
442   return 1;
443 #else
444   return !! IsValidCodePage ((UINT) cp);
445 #endif
446 }
447 
448 #if defined (_WIN32) || defined (__CYGWIN__)
449 
450 #define max_cp_string_len 6
451 
452 static unsigned int
453 codepage_from_langid (unsigned short langid)
454 {
455   char cp_string [max_cp_string_len];
456   int c;
457 
458   memset (cp_string, 0, max_cp_string_len);
459   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
460      but is unavailable on Win95.  */
461   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
462   		      LOCALE_IDEFAULTANSICODEPAGE,
463   		      cp_string, max_cp_string_len);
464   /* If codepage data for an LCID is not installed on users's system,
465      GetLocaleInfo returns an empty string.  Fall back to system ANSI
466      default. */
467   if (c == 0)
468     return CP_ACP;
469   return strtoul (cp_string, 0, 10);
470 }
471 
472 static unsigned int
473 wincodepage_from_langid (unsigned short langid)
474 {
475   char cp_string [max_cp_string_len];
476   int c;
477 
478   memset (cp_string, 0, max_cp_string_len);
479   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
480      but is unavailable on Win95.  */
481   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
482 		      LOCALE_IDEFAULTCODEPAGE,
483 		      cp_string, max_cp_string_len);
484   /* If codepage data for an LCID is not installed on users's system,
485      GetLocaleInfo returns an empty string.  Fall back to system ANSI
486      default. */
487   if (c == 0)
488     return CP_OEM;
489   return strtoul (cp_string, 0, 10);
490 }
491 
492 static char *
493 lang_from_langid (unsigned short langid)
494 {
495   char cp_string[261];
496   int c;
497 
498   memset (cp_string, 0, 261);
499   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
500   		      LOCALE_SENGLANGUAGE,
501   		      cp_string, 260);
502   /* If codepage data for an LCID is not installed on users's system,
503      GetLocaleInfo returns an empty string.  Fall back to system ANSI
504      default. */
505   if (c == 0)
506     strcpy (cp_string, "Neutral");
507   return xstrdup (cp_string);
508 }
509 
510 static char *
511 country_from_langid (unsigned short langid)
512 {
513   char cp_string[261];
514   int c;
515 
516   memset (cp_string, 0, 261);
517   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
518   		      LOCALE_SENGCOUNTRY,
519   		      cp_string, 260);
520   /* If codepage data for an LCID is not installed on users's system,
521      GetLocaleInfo returns an empty string.  Fall back to system ANSI
522      default. */
523   if (c == 0)
524     strcpy (cp_string, "Neutral");
525   return xstrdup (cp_string);
526 }
527 
528 #endif
529 
530 const wind_language_t *
531 wind_find_language_by_id (unsigned id)
532 {
533 #if !defined (_WIN32) && !defined (__CYGWIN__)
534   int i;
535 
536   if (! id)
537     return NULL;
538   for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
539     ;
540   if (languages[i].id == id)
541     return &languages[i];
542   return NULL;
543 #else
544   static wind_language_t wl;
545 
546   wl.id = id;
547   wl.doscp = codepage_from_langid ((unsigned short) id);
548   wl.wincp = wincodepage_from_langid ((unsigned short) id);
549   wl.name = lang_from_langid ((unsigned short) id);
550   wl.country = country_from_langid ((unsigned short) id);
551 
552   return & wl;
553 #endif
554 }
555 
556 const local_iconv_map *
557 wind_find_codepage_info (unsigned cp)
558 {
559 #if !defined (_WIN32) && !defined (__CYGWIN__)
560   int i;
561 
562   for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
563     ;
564   if (codepages[i].codepage == (rc_uint_type) -1)
565     return NULL;
566   return &codepages[i];
567 #else
568   static local_iconv_map lim;
569   if (!unicode_is_valid_codepage (cp))
570   	return NULL;
571   lim.codepage = cp;
572   lim.iconv_name = "";
573   return & lim;
574 #endif
575 }
576 
577 /* Convert an Codepage string to a unicode string.  */
578 
579 void
580 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
581 {
582   rc_uint_type len;
583 
584   len = wind_MultiByteToWideChar (cp, src, NULL, 0);
585   if (len)
586     {
587       *u = ((unichar *) res_alloc (len));
588       wind_MultiByteToWideChar (cp, src, *u, len);
589     }
590   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
591      this will set *length to -1.  */
592   len -= sizeof (unichar);
593 
594   if (length != NULL)
595     *length = len / sizeof (unichar);
596 }
597 
598 /* Convert an unicode string to an codepage string.  */
599 
600 void
601 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
602 {
603   rc_uint_type len;
604 
605   len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
606   if (len)
607     {
608       *ascii = (char *) res_alloc (len * sizeof (char));
609       wind_WideCharToMultiByte (cp, unicode, *ascii, len);
610     }
611   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
612      this will set *length to -1.  */
613   len--;
614 
615   if (length != NULL)
616     *length = len;
617 }
618 
619 #ifdef HAVE_ICONV_H
620 static int
621 iconv_onechar (iconv_t cd, const char *s, char *d, int d_len, const char **n_s, char **n_d)
622 {
623   int i;
624 
625   for (i = 1; i <= 32; i++)
626     {
627       char *tmp_d = d;
628       const char *tmp_s = s;
629       size_t ret;
630       size_t s_left = (size_t) i;
631       size_t d_left = (size_t) d_len;
632 
633       ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
634 
635       if (ret != (size_t) -1)
636 	{
637 	  *n_s = tmp_s;
638 	  *n_d = tmp_d;
639 	  return 0;
640 	}
641     }
642 
643   return 1;
644 }
645 
646 static const char *
647 wind_iconv_cp (rc_uint_type cp)
648 {
649   const local_iconv_map *lim = wind_find_codepage_info (cp);
650 
651   if (!lim)
652     return NULL;
653   return lim->iconv_name;
654 }
655 #endif /* HAVE_ICONV_H */
656 
657 static rc_uint_type
658 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
659 			  unichar *u, rc_uint_type u_len)
660 {
661   rc_uint_type ret = 0;
662 
663 #if defined (_WIN32) || defined (__CYGWIN__)
664   ret = (rc_uint_type) MultiByteToWideChar (cp, MB_PRECOMPOSED,
665 					    mb, -1, u, u_len);
666   /* Convert to bytes. */
667   ret *= sizeof (unichar);
668 
669 #elif defined (HAVE_ICONV_H)
670   int first = 1;
671   char tmp[32];
672   char *p_tmp;
673   const char *iconv_name = wind_iconv_cp (cp);
674 
675   if (!mb || !iconv_name)
676     return 0;
677   iconv_t cd = iconv_open ("UTF-16", iconv_name);
678 
679   while (1)
680     {
681       int iret;
682       const char *n_mb;
683       char *n_tmp;
684 
685       p_tmp = tmp;
686       iret = iconv_onechar (cd, (const char *) mb, p_tmp, 32, & n_mb, & n_tmp);
687       if (first)
688 	{
689 	  first = 0;
690 	  continue;
691 	}
692       if (!iret)
693 	{
694 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
695 
696 	  if (u)
697 	    {
698 	      if ((size_t) u_len < l_tmp)
699 		break;
700 	      memcpy (u, tmp, l_tmp);
701 	      u += l_tmp/2;
702 	      u_len -= l_tmp;
703 	    }
704 	  ret += l_tmp;
705 	}
706       else
707 	break;
708       if (tmp[0] == 0 && tmp[1] == 0)
709 	break;
710       mb = n_mb;
711     }
712   iconv_close (cd);
713 #else
714   if (cp)
715     ret = 0;
716   ret = strlen (mb) + 1;
717   ret *= sizeof (unichar);
718   if (u != NULL && u_len != 0)
719     {
720       do
721 	{
722 	  *u++ = ((unichar) *mb) & 0xff;
723 	  --u_len; mb++;
724 	}
725       while (u_len != 0 && mb[-1] != 0);
726     }
727   if (u != NULL && u_len != 0)
728     *u = 0;
729 #endif
730   return ret;
731 }
732 
733 static rc_uint_type
734 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
735 {
736   rc_uint_type ret = 0;
737 #if defined (_WIN32) || defined (__CYGWIN__)
738   WINBOOL used_def = FALSE;
739 
740   ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
741 				      	    NULL, & used_def);
742 #elif defined (HAVE_ICONV_H)
743   int first = 1;
744   char tmp[32];
745   char *p_tmp;
746   const char *iconv_name = wind_iconv_cp (cp);
747 
748   if (!u || !iconv_name)
749     return 0;
750   iconv_t cd = iconv_open (iconv_name, "UTF-16");
751 
752   while (1)
753     {
754       int iret;
755       const char *n_u;
756       char *n_tmp;
757 
758       p_tmp = tmp;
759       iret = iconv_onechar (cd, (const char *) u, p_tmp, 32, &n_u, & n_tmp);
760       if (first)
761 	{
762 	  first = 0;
763 	  continue;
764 	}
765       if (!iret)
766 	{
767 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
768 
769 	  if (mb)
770 	    {
771 	      if ((size_t) mb_len < l_tmp)
772 		break;
773 	      memcpy (mb, tmp, l_tmp);
774 	      mb += l_tmp;
775 	      mb_len -= l_tmp;
776 	    }
777 	  ret += l_tmp;
778 	}
779       else
780 	break;
781       if (u[0] == 0)
782 	break;
783       u = (const unichar *) n_u;
784     }
785   iconv_close (cd);
786 #else
787   if (cp)
788     ret = 0;
789 
790   while (u[ret] != 0)
791     ++ret;
792 
793   ++ret;
794 
795   if (mb)
796     {
797       while (*u != 0 && mb_len != 0)
798 	{
799 	  if (u[0] == (u[0] & 0x7f))
800 	    *mb++ = (char) u[0];
801 	  else
802 	    *mb++ = '_';
803 	  ++u; --mb_len;
804 	}
805       if (mb_len != 0)
806 	*mb = 0;
807     }
808 #endif
809   return ret;
810 }
811