xref: /dragonfly/contrib/gdb-7/gdb/charset.c (revision fcf53d9b)
1 /* Character set conversion support for GDB.
2 
3    Copyright (C) 2001, 2003, 2007, 2008, 2009 Free Software Foundation, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "defs.h"
21 #include "charset.h"
22 #include "gdbcmd.h"
23 #include "gdb_assert.h"
24 #include "gdb_obstack.h"
25 #include "gdb_wait.h"
26 #include "charset-list.h"
27 #include "vec.h"
28 
29 #include <stddef.h>
30 #include "gdb_string.h"
31 #include <ctype.h>
32 
33 
34 /* How GDB's character set support works
35 
36    GDB has three global settings:
37 
38    - The `current host character set' is the character set GDB should
39      use in talking to the user, and which (hopefully) the user's
40      terminal knows how to display properly.  Most users should not
41      change this.
42 
43    - The `current target character set' is the character set the
44      program being debugged uses.
45 
46    - The `current target wide character set' is the wide character set
47      the program being debugged uses, that is, the encoding used for
48      wchar_t.
49 
50    There are commands to set each of these, and mechanisms for
51    choosing reasonable default values.  GDB has a global list of
52    character sets that it can use as its host or target character
53    sets.
54 
55    The header file `charset.h' declares various functions that
56    different pieces of GDB need to perform tasks like:
57 
58    - printing target strings and characters to the user's terminal
59      (mostly target->host conversions),
60 
61    - building target-appropriate representations of strings and
62      characters the user enters in expressions (mostly host->target
63      conversions),
64 
65      and so on.
66 
67    To avoid excessive code duplication and maintenance efforts,
68    GDB simply requires a capable iconv function.  Users on platforms
69    without a suitable iconv can use the GNU iconv library.  */
70 
71 
72 #ifdef PHONY_ICONV
73 
74 /* Provide a phony iconv that does as little as possible.  Also,
75    arrange for there to be a single available character set.  */
76 
77 #undef GDB_DEFAULT_HOST_CHARSET
78 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
79 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
80 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
81 #undef DEFAULT_CHARSET_NAMES
82 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
83 
84 #undef iconv_t
85 #define iconv_t int
86 #undef iconv_open
87 #undef iconv
88 #undef iconv_close
89 
90 #undef ICONV_CONST
91 #define ICONV_CONST const
92 
93 /* Some systems don't have EILSEQ, so we define it here, but not as
94    EINVAL, because callers of `iconv' want to distinguish EINVAL and
95    EILSEQ.  This is what iconv.h from libiconv does as well.  Note
96    that wchar.h may also define EILSEQ, so this needs to be after we
97    include wchar.h, which happens in defs.h through gdb_wchar.h.  */
98 #ifndef EILSEQ
99 #define EILSEQ ENOENT
100 #endif
101 
102 iconv_t
103 iconv_open (const char *to, const char *from)
104 {
105   /* We allow conversions from UCS-4BE, wchar_t, and the host charset.
106      We allow conversions to wchar_t and the host charset.  */
107   if (strcmp (from, "UCS-4BE") && strcmp (from, "wchar_t")
108       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
109     return -1;
110   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
111     return -1;
112 
113   /* Return 1 if we are converting from UCS-4BE, 0 otherwise.  This is
114      used as a flag in calls to iconv.  */
115   return !strcmp (from, "UCS-4BE");
116 }
117 
118 int
119 iconv_close (iconv_t arg)
120 {
121   return 0;
122 }
123 
124 size_t
125 iconv (iconv_t ucs_flag, const char **inbuf, size_t *inbytesleft,
126        char **outbuf, size_t *outbytesleft)
127 {
128   if (ucs_flag)
129     {
130       while (*inbytesleft >= 4)
131 	{
132 	  size_t j;
133 	  unsigned long c = 0;
134 
135 	  for (j = 0; j < 4; ++j)
136 	    {
137 	      c <<= 8;
138 	      c += (*inbuf)[j] & 0xff;
139 	    }
140 
141 	  if (c >= 256)
142 	    {
143 	      errno = EILSEQ;
144 	      return -1;
145 	    }
146 	  **outbuf = c & 0xff;
147 	  ++*outbuf;
148 	  --*outbytesleft;
149 
150 	  ++*inbuf;
151 	  *inbytesleft -= 4;
152 	}
153       if (*inbytesleft < 4)
154 	{
155 	  errno = EINVAL;
156 	  return -1;
157 	}
158     }
159   else
160     {
161       /* In all other cases we simply copy input bytes to the
162 	 output.  */
163       size_t amt = *inbytesleft;
164       if (amt > *outbytesleft)
165 	amt = *outbytesleft;
166       memcpy (*outbuf, *inbuf, amt);
167       *inbuf += amt;
168       *outbuf += amt;
169       *inbytesleft -= amt;
170       *outbytesleft -= amt;
171     }
172 
173   if (*inbytesleft)
174     {
175       errno = E2BIG;
176       return -1;
177     }
178 
179   /* The number of non-reversible conversions -- but they were all
180      reversible.  */
181   return 0;
182 }
183 
184 #endif
185 
186 
187 
188 /* The global lists of character sets and translations.  */
189 
190 
191 #ifndef GDB_DEFAULT_TARGET_CHARSET
192 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
193 #endif
194 
195 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
196 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UCS-4"
197 #endif
198 
199 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
200 static const char *host_charset_name = "auto";
201 static void
202 show_host_charset_name (struct ui_file *file, int from_tty,
203 			struct cmd_list_element *c,
204 			const char *value)
205 {
206   if (!strcmp (value, "auto"))
207     fprintf_filtered (file,
208 		      _("The host character set is \"auto; currently %s\".\n"),
209 		      auto_host_charset_name);
210   else
211     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
212 }
213 
214 static const char *target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
215 static void
216 show_target_charset_name (struct ui_file *file, int from_tty,
217 			  struct cmd_list_element *c, const char *value)
218 {
219   fprintf_filtered (file, _("The target character set is \"%s\".\n"),
220 		    value);
221 }
222 
223 static const char *target_wide_charset_name = GDB_DEFAULT_TARGET_WIDE_CHARSET;
224 static void
225 show_target_wide_charset_name (struct ui_file *file, int from_tty,
226 			       struct cmd_list_element *c, const char *value)
227 {
228   fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
229 		    value);
230 }
231 
232 static const char *default_charset_names[] =
233 {
234   DEFAULT_CHARSET_NAMES
235   0
236 };
237 
238 static const char **charset_enum;
239 
240 
241 /* If the target wide character set has big- or little-endian
242    variants, these are the corresponding names.  */
243 static const char *target_wide_charset_be_name;
244 static const char *target_wide_charset_le_name;
245 
246 /* A helper function for validate which sets the target wide big- and
247    little-endian character set names, if possible.  */
248 
249 static void
250 set_be_le_names (void)
251 {
252   int i, len;
253 
254   target_wide_charset_le_name = NULL;
255   target_wide_charset_be_name = NULL;
256 
257   len = strlen (target_wide_charset_name);
258   for (i = 0; charset_enum[i]; ++i)
259     {
260       if (strncmp (target_wide_charset_name, charset_enum[i], len))
261 	continue;
262       if ((charset_enum[i][len] == 'B'
263 	   || charset_enum[i][len] == 'L')
264 	  && charset_enum[i][len + 1] == 'E'
265 	  && charset_enum[i][len + 2] == '\0')
266 	{
267 	  if (charset_enum[i][len] == 'B')
268 	    target_wide_charset_be_name = charset_enum[i];
269 	  else
270 	    target_wide_charset_le_name = charset_enum[i];
271 	}
272     }
273 }
274 
275 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
276    target-wide-charset', 'set charset' sfunc's.  */
277 
278 static void
279 validate (void)
280 {
281   iconv_t desc;
282   const char *host_cset = host_charset ();
283 
284   desc = iconv_open (target_wide_charset_name, host_cset);
285   if (desc == (iconv_t) -1)
286     error ("Cannot convert between character sets `%s' and `%s'",
287 	   target_wide_charset_name, host_cset);
288   iconv_close (desc);
289 
290   desc = iconv_open (target_charset_name, host_cset);
291   if (desc == (iconv_t) -1)
292     error ("Cannot convert between character sets `%s' and `%s'",
293 	   target_charset_name, host_cset);
294   iconv_close (desc);
295 
296   set_be_le_names ();
297 }
298 
299 /* This is the sfunc for the 'set charset' command.  */
300 static void
301 set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
302 {
303   /* CAREFUL: set the target charset here as well. */
304   target_charset_name = host_charset_name;
305   validate ();
306 }
307 
308 /* 'set host-charset' command sfunc.  We need a wrapper here because
309    the function needs to have a specific signature.  */
310 static void
311 set_host_charset_sfunc (char *charset, int from_tty,
312 			struct cmd_list_element *c)
313 {
314   validate ();
315 }
316 
317 /* Wrapper for the 'set target-charset' command.  */
318 static void
319 set_target_charset_sfunc (char *charset, int from_tty,
320 			  struct cmd_list_element *c)
321 {
322   validate ();
323 }
324 
325 /* Wrapper for the 'set target-wide-charset' command.  */
326 static void
327 set_target_wide_charset_sfunc (char *charset, int from_tty,
328 			       struct cmd_list_element *c)
329 {
330   validate ();
331 }
332 
333 /* sfunc for the 'show charset' command.  */
334 static void
335 show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
336 	      const char *name)
337 {
338   show_host_charset_name (file, from_tty, c, host_charset_name);
339   show_target_charset_name (file, from_tty, c, target_charset_name);
340   show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
341 }
342 
343 
344 /* Accessor functions.  */
345 
346 const char *
347 host_charset (void)
348 {
349   if (!strcmp (host_charset_name, "auto"))
350     return auto_host_charset_name;
351   return host_charset_name;
352 }
353 
354 const char *
355 target_charset (void)
356 {
357   return target_charset_name;
358 }
359 
360 const char *
361 target_wide_charset (enum bfd_endian byte_order)
362 {
363   if (byte_order == BFD_ENDIAN_BIG)
364     {
365       if (target_wide_charset_be_name)
366 	return target_wide_charset_be_name;
367     }
368   else
369     {
370       if (target_wide_charset_le_name)
371 	return target_wide_charset_le_name;
372     }
373 
374   return target_wide_charset_name;
375 }
376 
377 
378 /* Host character set management.  For the time being, we assume that
379    the host character set is some superset of ASCII.  */
380 
381 char
382 host_letter_to_control_character (char c)
383 {
384   if (c == '?')
385     return 0177;
386   return c & 0237;
387 }
388 
389 /* Convert a host character, C, to its hex value.  C must already have
390    been validated using isxdigit.  */
391 
392 int
393 host_hex_value (char c)
394 {
395   if (isdigit (c))
396     return c - '0';
397   if (c >= 'a' && c <= 'f')
398     return 10 + c - 'a';
399   gdb_assert (c >= 'A' && c <= 'F');
400   return 10 + c - 'A';
401 }
402 
403 
404 /* Public character management functions.  */
405 
406 /* A cleanup function which is run to close an iconv descriptor.  */
407 
408 static void
409 cleanup_iconv (void *p)
410 {
411   iconv_t *descp = p;
412   iconv_close (*descp);
413 }
414 
415 static size_t
416 convert_wchar (gdb_wchar_t **pinp, size_t *pinleft, char **poutp, size_t *poutleft)
417 {
418   char tmp[MB_CUR_MAX];
419   int r;
420 
421   while (*pinleft >= sizeof(gdb_wchar_t))
422     {
423       r = wctomb(tmp, **pinp);
424 
425       if (r == -1)
426 	perror_with_name ("Internal error while converting character sets");
427 
428       if (*poutleft < r)
429 	{
430 	  errno = E2BIG;
431 	  return (size_t) -1;
432 	}
433 
434       memcpy(*poutp, tmp, r);
435       *poutp += r;
436       *poutleft -= r;
437       ++*pinp;
438       *pinleft -= sizeof(gdb_wchar_t);
439     }
440 
441   if (*pinleft != 0)
442     return EINVAL;
443 
444   return 0;
445 }
446 
447 void
448 convert_between_encodings (const char *from, const char *to,
449 			   const gdb_byte *bytes, unsigned int num_bytes,
450 			   int width, struct obstack *output,
451 			   enum transliterations translit)
452 {
453   iconv_t desc;
454   struct cleanup *cleanups;
455   size_t inleft;
456   char *inp;
457   unsigned int space_request;
458   int use_wctomb = 0;
459 
460   /* Often, the host and target charsets will be the same.  */
461   if (!strcmp (from, to))
462     {
463       obstack_grow (output, bytes, num_bytes);
464       return;
465     }
466 
467   if (!strcmp (from, "wchar_t"))
468     {
469       if (strcmp (to, host_charset ()))
470 	perror_with_name ("Converting character sets");
471       cleanups = NULL;	/* silence gcc complaints */
472       use_wctomb = 1;
473     }
474   else
475     {
476       desc = iconv_open (to, from);
477       if (desc == (iconv_t) -1)
478 	perror_with_name ("Converting character sets");
479       cleanups = make_cleanup (cleanup_iconv, &desc);
480     }
481 
482   inleft = num_bytes;
483   inp = (char *) bytes;
484 
485   space_request = num_bytes;
486 
487   while (inleft > 0)
488     {
489       char *outp;
490       size_t outleft, r;
491       int old_size;
492 
493       old_size = obstack_object_size (output);
494       obstack_blank (output, space_request);
495 
496       outp = obstack_base (output) + old_size;
497       outleft = space_request;
498 
499       if (use_wctomb)
500 	r = convert_wchar((gdb_wchar_t **)(void *)&inp, &inleft, &outp, &outleft);
501       else
502 	r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
503 
504       /* Now make sure that the object on the obstack only includes
505 	 bytes we have converted.  */
506       obstack_blank (output, - (int) outleft);
507 
508       if (r == (size_t) -1)
509 	{
510 	  switch (errno)
511 	    {
512 	    case EILSEQ:
513 	      {
514 		int i;
515 
516 		/* Invalid input sequence.  */
517 		if (translit == translit_none)
518 		  error (_("Could not convert character to `%s' character set"),
519 			 to);
520 
521 		/* We emit escape sequence for the bytes, skip them,
522 		   and try again.  */
523 		for (i = 0; i < width; ++i)
524 		  {
525 		    char octal[5];
526 
527 		    sprintf (octal, "\\%.3o", *inp & 0xff);
528 		    obstack_grow_str (output, octal);
529 
530 		    ++inp;
531 		    --inleft;
532 		  }
533 	      }
534 	      break;
535 
536 	    case E2BIG:
537 	      /* We ran out of space in the output buffer.  Make it
538 		 bigger next time around.  */
539 	      space_request *= 2;
540 	      break;
541 
542 	    case EINVAL:
543 	      /* Incomplete input sequence.  FIXME: ought to report this
544 		 to the caller somehow.  */
545 	      inleft = 0;
546 	      break;
547 
548 	    default:
549 	      perror_with_name ("Internal error while converting character sets");
550 	    }
551 	}
552     }
553 
554   if (!use_wctomb)
555     do_cleanups (cleanups);
556 }
557 
558 
559 
560 /* An iterator that returns host wchar_t's from a target string.  */
561 struct wchar_iterator
562 {
563   /* The underlying iconv descriptor.  */
564   iconv_t desc;
565 
566   /* The input string.  This is updated as convert characters.  */
567   char *input;
568   /* The number of bytes remaining in the input.  */
569   size_t bytes;
570 
571   /* The width of an input character.  */
572   size_t width;
573 
574   /* The intermediate buffer */
575   char *inter;
576   size_t inter_size;
577   size_t inter_len;
578 
579   /* The output byte.  */
580   gdb_wchar_t out;
581 };
582 
583 /* Create a new iterator.  */
584 struct wchar_iterator *
585 make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
586 		     size_t width)
587 {
588   struct wchar_iterator *result;
589   iconv_t desc;
590 
591   desc = iconv_open (host_charset (), charset);
592   if (desc == (iconv_t) -1)
593     perror_with_name ("Converting character sets");
594 
595   result = XNEW (struct wchar_iterator);
596   result->desc = desc;
597   result->input = (char *) input;
598   result->bytes = bytes;
599   result->width = width;
600 
601   result->inter = XNEW (char);
602   result->inter_size = 1;
603   result->inter_len = 0;
604 
605   return result;
606 }
607 
608 static void
609 do_cleanup_iterator (void *p)
610 {
611   struct wchar_iterator *iter = p;
612 
613   iconv_close (iter->desc);
614   xfree (iter->inter);
615   xfree (iter);
616 }
617 
618 struct cleanup *
619 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
620 {
621   return make_cleanup (do_cleanup_iterator, iter);
622 }
623 
624 int
625 wchar_iterate (struct wchar_iterator *iter,
626 	       enum wchar_iterate_result *out_result,
627 	       gdb_wchar_t **out_chars,
628 	       const gdb_byte **ptr,
629 	       size_t *len)
630 {
631   size_t out_request;
632   char *orig_inptr = iter->input;
633   size_t orig_in = iter->bytes;
634 
635   /* Try to convert some characters.  At first we try to convert just
636      a single character.  The reason for this is that iconv does not
637      necessarily update its outgoing arguments when it encounters an
638      invalid input sequence -- but we want to reliably report this to
639      our caller so it can emit an escape sequence.  */
640   while (iter->inter_len == 0 && iter->bytes > 0)
641     {
642       out_request = 1;
643       while (iter->bytes > 0)
644 	{
645 	  char *outptr = (char *) &iter->inter[iter->inter_len];
646 	  size_t out_avail = out_request;
647 
648 	  size_t r = iconv (iter->desc,
649 			    (ICONV_CONST char **) &iter->input, &iter->bytes,
650 			    &outptr, &out_avail);
651 	  if (r == (size_t) -1)
652 	    {
653 	      switch (errno)
654 		{
655 		case EILSEQ:
656 		  /* Invalid input sequence.  Skip it, and let the caller
657 		     know about it.  */
658 		  *out_result = wchar_iterate_invalid;
659 		  *ptr = iter->input;
660 		  *len = iter->width;
661 		  iter->input += iter->width;
662 		  iter->bytes -= iter->width;
663 		  return 0;
664 
665 		case E2BIG:
666 		  /* We ran out of space.  We still might have converted a
667 		     character; if so, return it.  Otherwise, grow the
668 		     buffer and try again.  */
669 		  if (out_avail < out_request)
670 		    break;
671 
672 		  ++out_request;
673 		  if (out_request > iter->inter_size)
674 		    {
675 		      iter->inter_size = out_request;
676 		      iter->inter = xrealloc (iter->inter, out_request);
677 		    }
678 		  continue;
679 
680 		case EINVAL:
681 		  /* Incomplete input sequence.  Let the caller know, and
682 		     arrange for future calls to see EOF.  */
683 		  *out_result = wchar_iterate_incomplete;
684 		  *ptr = iter->input;
685 		  *len = iter->bytes;
686 		  iter->bytes = 0;
687 		  return 0;
688 
689 		default:
690 		  perror_with_name ("Internal error while converting character sets");
691 		}
692 	    }
693 
694 	  /* We converted something.  */
695 	  iter->inter_len += out_request - out_avail;
696 	  break;
697 	}
698     }
699 
700   if (iter->inter_len > 0)
701     {
702       int r;
703 
704       /* Now convert from our charset to wchar_t */
705       r = mbtowc(&iter->out, &iter->inter[0], iter->inter_len);
706 
707       /* This must never happen: we just converted to a valid charset! */
708       if (r < 0)
709 	perror_with_name ("Internal error while converting character sets");
710 
711       /* NUL bytes are alright */
712       if (r == 0)
713 	  r = 1;
714 
715       iter->inter_len -= r;
716       memmove(&iter->inter[0], &iter->inter[r], iter->inter_len);
717 
718       *out_result = wchar_iterate_ok;
719       *out_chars = &iter->out;
720       *ptr = orig_inptr;
721       *len = orig_in - iter->bytes;
722       return 1;
723     }
724 
725   /* Really done.  */
726   *out_result = wchar_iterate_eof;
727   return -1;
728 }
729 
730 
731 /* The charset.c module initialization function.  */
732 
733 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
734 
735 typedef char *char_ptr;
736 DEF_VEC_P (char_ptr);
737 
738 static VEC (char_ptr) *charsets;
739 
740 #ifdef PHONY_ICONV
741 
742 static void
743 find_charset_names (void)
744 {
745   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
746   VEC_safe_push (char_ptr, charsets, NULL);
747 }
748 
749 #else /* PHONY_ICONV */
750 
751 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
752    provides different symbols in the static and dynamic libraries.
753    So, configure may see libiconvlist but not iconvlist.  But, calling
754    iconvlist is the right thing to do and will work.  Hence we do a
755    check here but unconditionally call iconvlist below.  */
756 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
757 
758 /* A helper function that adds some character sets to the vector of
759    all character sets.  This is a callback function for iconvlist.  */
760 
761 static int
762 add_one (unsigned int count, const char *const *names, void *data)
763 {
764   unsigned int i;
765 
766   for (i = 0; i < count; ++i)
767     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
768 
769   return 0;
770 }
771 
772 static void
773 find_charset_names (void)
774 {
775   iconvlist (add_one, NULL);
776   VEC_safe_push (char_ptr, charsets, NULL);
777 }
778 
779 #else
780 
781 static void
782 find_charset_names (void)
783 {
784   struct pex_obj *child;
785   char *args[3];
786   int err, status;
787   int fail = 1;
788 
789   child = pex_init (0, "iconv", NULL);
790 
791   args[0] = "iconv";
792   args[1] = "-l";
793   args[2] = NULL;
794   /* Note that we simply ignore errors here.  */
795   if (!pex_run (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT, "iconv",
796 		args, NULL, NULL, &err))
797     {
798       FILE *in = pex_read_output (child, 0);
799 
800       /* POSIX says that iconv -l uses an unspecified format.  We
801 	 parse the glibc and libiconv formats; feel free to add others
802 	 as needed.  */
803       while (!feof (in))
804 	{
805 	  /* The size of buf is chosen arbitrarily.  */
806 	  char buf[1024];
807 	  char *start, *r;
808 	  int len, keep_going;
809 
810 	  r = fgets (buf, sizeof (buf), in);
811 	  if (!r)
812 	    break;
813 	  len = strlen (r);
814 	  if (len <= 3)
815 	    continue;
816 	  /* Strip off the newline.  */
817 	  --len;
818 	  /* Strip off one or two '/'s.  glibc will print lines like
819 	     "8859_7//", but also "10646-1:1993/UCS4/".  */
820 	  if (buf[len - 1] == '/')
821 	    --len;
822 	  if (buf[len - 1] == '/')
823 	    --len;
824 	  buf[len] = '\0';
825 
826 	  /* libiconv will print multiple entries per line, separated
827 	     by spaces.  */
828 	  start = buf;
829 	  while (1)
830 	    {
831 	      int keep_going;
832 	      char *p;
833 
834 	      /* Find the next space, or end-of-line.  */
835 	      for (p = start; *p && *p != ' '; ++p)
836 		;
837 	      /* Ignore an empty result.  */
838 	      if (p == start)
839 		break;
840 	      keep_going = *p;
841 	      *p = '\0';
842 	      VEC_safe_push (char_ptr, charsets, xstrdup (start));
843 	      if (!keep_going)
844 		break;
845 	      /* Skip any extra spaces.  */
846 	      for (start = p + 1; *start && *start == ' '; ++start)
847 		;
848 	    }
849 	}
850 
851       if (pex_get_status (child, 1, &status)
852 	  && WIFEXITED (status) && !WEXITSTATUS (status))
853 	fail = 0;
854 
855     }
856 
857   pex_free (child);
858 
859   if (fail)
860     {
861       /* Some error occurred, so drop the vector.  */
862       int ix;
863       char *elt;
864       for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
865 	xfree (elt);
866       VEC_truncate (char_ptr, charsets, 0);
867     }
868   else
869     VEC_safe_push (char_ptr, charsets, NULL);
870 }
871 
872 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
873 #endif /* PHONY_ICONV */
874 
875 void
876 _initialize_charset (void)
877 {
878   struct cmd_list_element *new_cmd;
879 
880   /* The first element is always "auto"; then we skip it for the
881      commands where it is not allowed.  */
882   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
883   find_charset_names ();
884 
885   if (VEC_length (char_ptr, charsets) > 1)
886     charset_enum = (const char **) VEC_address (char_ptr, charsets);
887   else
888     charset_enum = default_charset_names;
889 
890 #ifndef PHONY_ICONV
891 #ifdef HAVE_LANGINFO_CODESET
892   auto_host_charset_name = nl_langinfo (CODESET);
893   /* Solaris will return `646' here -- but the Solaris iconv then
894      does not accept this.  */
895   if (!strcmp (auto_host_charset_name, "646"))
896     auto_host_charset_name = "ASCII";
897   target_charset_name = auto_host_charset_name;
898 
899   set_be_le_names ();
900 #endif
901 #endif
902 
903   add_setshow_enum_cmd ("charset", class_support,
904 			&charset_enum[1], &host_charset_name, _("\
905 Set the host and target character sets."), _("\
906 Show the host and target character sets."), _("\
907 The `host character set' is the one used by the system GDB is running on.\n\
908 The `target character set' is the one used by the program being debugged.\n\
909 You may only use supersets of ASCII for your host character set; GDB does\n\
910 not support any others.\n\
911 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
912 			/* Note that the sfunc below needs to set
913 			   target_charset_name, because the 'set
914 			   charset' command sets two variables.  */
915 			set_charset_sfunc,
916 			show_charset,
917 			&setlist, &showlist);
918 
919   add_setshow_enum_cmd ("host-charset", class_support,
920 			charset_enum, &host_charset_name, _("\
921 Set the host character set."), _("\
922 Show the host character set."), _("\
923 The `host character set' is the one used by the system GDB is running on.\n\
924 You may only use supersets of ASCII for your host character set; GDB does\n\
925 not support any others.\n\
926 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
927 			set_host_charset_sfunc,
928 			show_host_charset_name,
929 			&setlist, &showlist);
930 
931   add_setshow_enum_cmd ("target-charset", class_support,
932 			&charset_enum[1], &target_charset_name, _("\
933 Set the target character set."), _("\
934 Show the target character set."), _("\
935 The `target character set' is the one used by the program being debugged.\n\
936 GDB translates characters and strings between the host and target\n\
937 character sets as needed.\n\
938 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
939 			set_target_charset_sfunc,
940 			show_target_charset_name,
941 			&setlist, &showlist);
942 
943   add_setshow_enum_cmd ("target-wide-charset", class_support,
944 			&charset_enum[1], &target_wide_charset_name,
945 			_("\
946 Set the target wide character set."), _("\
947 Show the target wide character set."), _("\
948 The `target wide character set' is the one used by the program being debugged.\n\
949 In particular it is the encoding used by `wchar_t'.\n\
950 GDB translates characters and strings between the host and target\n\
951 character sets as needed.\n\
952 To see a list of the character sets GDB supports, type\n\
953 `set target-wide-charset'<TAB>"),
954 			set_target_wide_charset_sfunc,
955 			show_target_wide_charset_name,
956 			&setlist, &showlist);
957 }
958