1 /* Character set conversion support for GDB. 2 3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010, 2011 4 Free Software Foundation, Inc. 5 6 This file is part of GDB. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 20 21 #include "defs.h" 22 #include "charset.h" 23 #include "gdbcmd.h" 24 #include "gdb_assert.h" 25 #include "gdb_obstack.h" 26 #include "gdb_wait.h" 27 #include "charset-list.h" 28 #include "vec.h" 29 #include "environ.h" 30 #include "arch-utils.h" 31 32 #include <stddef.h> 33 #include "gdb_string.h" 34 #include <ctype.h> 35 36 #ifdef USE_WIN32API 37 #include <windows.h> 38 #endif 39 40 /* How GDB's character set support works 41 42 GDB has three global settings: 43 44 - The `current host character set' is the character set GDB should 45 use in talking to the user, and which (hopefully) the user's 46 terminal knows how to display properly. Most users should not 47 change this. 48 49 - The `current target character set' is the character set the 50 program being debugged uses. 51 52 - The `current target wide character set' is the wide character set 53 the program being debugged uses, that is, the encoding used for 54 wchar_t. 55 56 There are commands to set each of these, and mechanisms for 57 choosing reasonable default values. GDB has a global list of 58 character sets that it can use as its host or target character 59 sets. 60 61 The header file `charset.h' declares various functions that 62 different pieces of GDB need to perform tasks like: 63 64 - printing target strings and characters to the user's terminal 65 (mostly target->host conversions), 66 67 - building target-appropriate representations of strings and 68 characters the user enters in expressions (mostly host->target 69 conversions), 70 71 and so on. 72 73 To avoid excessive code duplication and maintenance efforts, 74 GDB simply requires a capable iconv function. Users on platforms 75 without a suitable iconv can use the GNU iconv library. */ 76 77 78 #ifdef PHONY_ICONV 79 80 /* Provide a phony iconv that does as little as possible. Also, 81 arrange for there to be a single available character set. */ 82 83 #undef GDB_DEFAULT_HOST_CHARSET 84 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" 85 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 86 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" 87 #undef DEFAULT_CHARSET_NAMES 88 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , 89 90 #undef iconv_t 91 #define iconv_t int 92 #undef iconv_open 93 #define iconv_open phony_iconv_open 94 #undef iconv 95 #define iconv phony_iconv 96 #undef iconv_close 97 #define iconv_close phony_iconv_close 98 99 #undef ICONV_CONST 100 #define ICONV_CONST const 101 102 /* Some systems don't have EILSEQ, so we define it here, but not as 103 EINVAL, because callers of `iconv' want to distinguish EINVAL and 104 EILSEQ. This is what iconv.h from libiconv does as well. Note 105 that wchar.h may also define EILSEQ, so this needs to be after we 106 include wchar.h, which happens in defs.h through gdb_wchar.h. */ 107 #ifndef EILSEQ 108 #define EILSEQ ENOENT 109 #endif 110 111 iconv_t 112 phony_iconv_open (const char *to, const char *from) 113 { 114 /* We allow conversions from UTF-32BE, wchar_t, and the host charset. 115 We allow conversions to wchar_t and the host charset. */ 116 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") 117 && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) 118 return -1; 119 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) 120 return -1; 121 122 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is 123 used as a flag in calls to iconv. */ 124 return !strcmp (from, "UTF-32BE"); 125 } 126 127 int 128 phony_iconv_close (iconv_t arg) 129 { 130 return 0; 131 } 132 133 size_t 134 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, 135 char **outbuf, size_t *outbytesleft) 136 { 137 if (utf_flag) 138 { 139 while (*inbytesleft >= 4) 140 { 141 size_t j; 142 unsigned long c = 0; 143 144 for (j = 0; j < 4; ++j) 145 { 146 c <<= 8; 147 c += (*inbuf)[j] & 0xff; 148 } 149 150 if (c >= 256) 151 { 152 errno = EILSEQ; 153 return -1; 154 } 155 **outbuf = c & 0xff; 156 ++*outbuf; 157 --*outbytesleft; 158 159 ++*inbuf; 160 *inbytesleft -= 4; 161 } 162 if (*inbytesleft < 4) 163 { 164 errno = EINVAL; 165 return -1; 166 } 167 } 168 else 169 { 170 /* In all other cases we simply copy input bytes to the 171 output. */ 172 size_t amt = *inbytesleft; 173 174 if (amt > *outbytesleft) 175 amt = *outbytesleft; 176 memcpy (*outbuf, *inbuf, amt); 177 *inbuf += amt; 178 *outbuf += amt; 179 *inbytesleft -= amt; 180 *outbytesleft -= amt; 181 } 182 183 if (*inbytesleft) 184 { 185 errno = E2BIG; 186 return -1; 187 } 188 189 /* The number of non-reversible conversions -- but they were all 190 reversible. */ 191 return 0; 192 } 193 194 #endif 195 196 197 198 /* The global lists of character sets and translations. */ 199 200 201 #ifndef GDB_DEFAULT_TARGET_CHARSET 202 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 203 #endif 204 205 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET 206 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" 207 #endif 208 209 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; 210 static const char *host_charset_name = "auto"; 211 static void 212 show_host_charset_name (struct ui_file *file, int from_tty, 213 struct cmd_list_element *c, 214 const char *value) 215 { 216 if (!strcmp (value, "auto")) 217 fprintf_filtered (file, 218 _("The host character set is \"auto; currently %s\".\n"), 219 auto_host_charset_name); 220 else 221 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); 222 } 223 224 static const char *target_charset_name = "auto"; 225 static void 226 show_target_charset_name (struct ui_file *file, int from_tty, 227 struct cmd_list_element *c, const char *value) 228 { 229 if (!strcmp (value, "auto")) 230 fprintf_filtered (file, 231 _("The target character set is \"auto; " 232 "currently %s\".\n"), 233 gdbarch_auto_charset (get_current_arch ())); 234 else 235 fprintf_filtered (file, _("The target character set is \"%s\".\n"), 236 value); 237 } 238 239 static const char *target_wide_charset_name = "auto"; 240 static void 241 show_target_wide_charset_name (struct ui_file *file, 242 int from_tty, 243 struct cmd_list_element *c, 244 const char *value) 245 { 246 if (!strcmp (value, "auto")) 247 fprintf_filtered (file, 248 _("The target wide character set is \"auto; " 249 "currently %s\".\n"), 250 gdbarch_auto_wide_charset (get_current_arch ())); 251 else 252 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), 253 value); 254 } 255 256 static const char *default_charset_names[] = 257 { 258 DEFAULT_CHARSET_NAMES 259 0 260 }; 261 262 static const char **charset_enum; 263 264 265 /* If the target wide character set has big- or little-endian 266 variants, these are the corresponding names. */ 267 static const char *target_wide_charset_be_name; 268 static const char *target_wide_charset_le_name; 269 270 /* The architecture for which the BE- and LE-names are valid. */ 271 static struct gdbarch *be_le_arch; 272 273 /* A helper function which sets the target wide big- and little-endian 274 character set names, if possible. */ 275 276 static void 277 set_be_le_names (struct gdbarch *gdbarch) 278 { 279 int i, len; 280 const char *target_wide; 281 282 if (be_le_arch == gdbarch) 283 return; 284 be_le_arch = gdbarch; 285 286 target_wide_charset_le_name = NULL; 287 target_wide_charset_be_name = NULL; 288 289 target_wide = target_wide_charset_name; 290 if (!strcmp (target_wide, "auto")) 291 target_wide = gdbarch_auto_wide_charset (gdbarch); 292 293 len = strlen (target_wide); 294 for (i = 0; charset_enum[i]; ++i) 295 { 296 if (strncmp (target_wide, charset_enum[i], len)) 297 continue; 298 if ((charset_enum[i][len] == 'B' 299 || charset_enum[i][len] == 'L') 300 && charset_enum[i][len + 1] == 'E' 301 && charset_enum[i][len + 2] == '\0') 302 { 303 if (charset_enum[i][len] == 'B') 304 target_wide_charset_be_name = charset_enum[i]; 305 else 306 target_wide_charset_le_name = charset_enum[i]; 307 } 308 } 309 } 310 311 /* 'Set charset', 'set host-charset', 'set target-charset', 'set 312 target-wide-charset', 'set charset' sfunc's. */ 313 314 static void 315 validate (struct gdbarch *gdbarch) 316 { 317 iconv_t desc; 318 const char *host_cset = host_charset (); 319 const char *target_cset = target_charset (gdbarch); 320 const char *target_wide_cset = target_wide_charset_name; 321 322 if (!strcmp (target_wide_cset, "auto")) 323 target_wide_cset = gdbarch_auto_wide_charset (gdbarch); 324 325 desc = iconv_open (target_wide_cset, host_cset); 326 if (desc == (iconv_t) -1) 327 error (_("Cannot convert between character sets `%s' and `%s'"), 328 target_wide_cset, host_cset); 329 iconv_close (desc); 330 331 desc = iconv_open (target_cset, host_cset); 332 if (desc == (iconv_t) -1) 333 error (_("Cannot convert between character sets `%s' and `%s'"), 334 target_cset, host_cset); 335 iconv_close (desc); 336 337 /* Clear the cache. */ 338 be_le_arch = NULL; 339 } 340 341 /* This is the sfunc for the 'set charset' command. */ 342 static void 343 set_charset_sfunc (char *charset, int from_tty, 344 struct cmd_list_element *c) 345 { 346 /* CAREFUL: set the target charset here as well. */ 347 target_charset_name = host_charset_name; 348 validate (get_current_arch ()); 349 } 350 351 /* 'set host-charset' command sfunc. We need a wrapper here because 352 the function needs to have a specific signature. */ 353 static void 354 set_host_charset_sfunc (char *charset, int from_tty, 355 struct cmd_list_element *c) 356 { 357 validate (get_current_arch ()); 358 } 359 360 /* Wrapper for the 'set target-charset' command. */ 361 static void 362 set_target_charset_sfunc (char *charset, int from_tty, 363 struct cmd_list_element *c) 364 { 365 validate (get_current_arch ()); 366 } 367 368 /* Wrapper for the 'set target-wide-charset' command. */ 369 static void 370 set_target_wide_charset_sfunc (char *charset, int from_tty, 371 struct cmd_list_element *c) 372 { 373 validate (get_current_arch ()); 374 } 375 376 /* sfunc for the 'show charset' command. */ 377 static void 378 show_charset (struct ui_file *file, int from_tty, 379 struct cmd_list_element *c, 380 const char *name) 381 { 382 show_host_charset_name (file, from_tty, c, host_charset_name); 383 show_target_charset_name (file, from_tty, c, target_charset_name); 384 show_target_wide_charset_name (file, from_tty, c, 385 target_wide_charset_name); 386 } 387 388 389 /* Accessor functions. */ 390 391 const char * 392 host_charset (void) 393 { 394 if (!strcmp (host_charset_name, "auto")) 395 return auto_host_charset_name; 396 return host_charset_name; 397 } 398 399 const char * 400 target_charset (struct gdbarch *gdbarch) 401 { 402 if (!strcmp (target_charset_name, "auto")) 403 return gdbarch_auto_charset (gdbarch); 404 return target_charset_name; 405 } 406 407 const char * 408 target_wide_charset (struct gdbarch *gdbarch) 409 { 410 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); 411 412 set_be_le_names (gdbarch); 413 if (byte_order == BFD_ENDIAN_BIG) 414 { 415 if (target_wide_charset_be_name) 416 return target_wide_charset_be_name; 417 } 418 else 419 { 420 if (target_wide_charset_le_name) 421 return target_wide_charset_le_name; 422 } 423 424 if (!strcmp (target_wide_charset_name, "auto")) 425 return gdbarch_auto_wide_charset (gdbarch); 426 427 return target_wide_charset_name; 428 } 429 430 431 /* Host character set management. For the time being, we assume that 432 the host character set is some superset of ASCII. */ 433 434 char 435 host_letter_to_control_character (char c) 436 { 437 if (c == '?') 438 return 0177; 439 return c & 0237; 440 } 441 442 /* Convert a host character, C, to its hex value. C must already have 443 been validated using isxdigit. */ 444 445 int 446 host_hex_value (char c) 447 { 448 if (isdigit (c)) 449 return c - '0'; 450 if (c >= 'a' && c <= 'f') 451 return 10 + c - 'a'; 452 gdb_assert (c >= 'A' && c <= 'F'); 453 return 10 + c - 'A'; 454 } 455 456 457 /* Public character management functions. */ 458 459 /* A cleanup function which is run to close an iconv descriptor. */ 460 461 static void 462 cleanup_iconv (void *p) 463 { 464 iconv_t *descp = p; 465 iconv_close (*descp); 466 } 467 468 static size_t 469 convert_wchar (gdb_wchar_t **pinp, size_t *pinleft, char **poutp, size_t *poutleft) 470 { 471 char tmp[MB_CUR_MAX]; 472 int r; 473 474 while (*pinleft >= sizeof(gdb_wchar_t)) 475 { 476 r = wctomb(tmp, **pinp); 477 478 if (r == -1) 479 perror_with_name ("Internal error while converting character sets"); 480 481 if (*poutleft < r) 482 { 483 errno = E2BIG; 484 return (size_t) -1; 485 } 486 487 memcpy(*poutp, tmp, r); 488 *poutp += r; 489 *poutleft -= r; 490 ++*pinp; 491 *pinleft -= sizeof(gdb_wchar_t); 492 } 493 494 if (*pinleft != 0) 495 return EINVAL; 496 497 return 0; 498 } 499 500 void 501 convert_between_encodings (const char *from, const char *to, 502 const gdb_byte *bytes, unsigned int num_bytes, 503 int width, struct obstack *output, 504 enum transliterations translit) 505 { 506 iconv_t desc; 507 struct cleanup *cleanups; 508 size_t inleft; 509 char *inp; 510 unsigned int space_request; 511 int use_wctomb = 0; 512 513 /* Often, the host and target charsets will be the same. */ 514 if (!strcmp (from, to)) 515 { 516 obstack_grow (output, bytes, num_bytes); 517 return; 518 } 519 520 if (!strcmp (from, "wchar_t")) 521 { 522 if (strcmp (to, host_charset ())) 523 perror_with_name (_("Converting character sets")); 524 cleanups = NULL; /* silence gcc complaints */ 525 use_wctomb = 1; 526 } 527 else 528 { 529 desc = iconv_open (to, from); 530 if (desc == (iconv_t) -1) 531 perror_with_name (_("Converting character sets")); 532 cleanups = make_cleanup (cleanup_iconv, &desc); 533 } 534 535 inleft = num_bytes; 536 inp = (char *) bytes; 537 538 space_request = num_bytes; 539 540 while (inleft > 0) 541 { 542 char *outp; 543 size_t outleft, r; 544 int old_size; 545 546 old_size = obstack_object_size (output); 547 obstack_blank (output, space_request); 548 549 outp = obstack_base (output) + old_size; 550 outleft = space_request; 551 552 if (use_wctomb) 553 r = convert_wchar((gdb_wchar_t **)(void *)&inp, &inleft, &outp, &outleft); 554 else 555 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft); 556 557 /* Now make sure that the object on the obstack only includes 558 bytes we have converted. */ 559 obstack_blank (output, - (int) outleft); 560 561 if (r == (size_t) -1) 562 { 563 switch (errno) 564 { 565 case EILSEQ: 566 { 567 int i; 568 569 /* Invalid input sequence. */ 570 if (translit == translit_none) 571 error (_("Could not convert character " 572 "to `%s' character set"), to); 573 574 /* We emit escape sequence for the bytes, skip them, 575 and try again. */ 576 for (i = 0; i < width; ++i) 577 { 578 char octal[5]; 579 580 sprintf (octal, "\\%.3o", *inp & 0xff); 581 obstack_grow_str (output, octal); 582 583 ++inp; 584 --inleft; 585 } 586 } 587 break; 588 589 case E2BIG: 590 /* We ran out of space in the output buffer. Make it 591 bigger next time around. */ 592 space_request *= 2; 593 break; 594 595 case EINVAL: 596 /* Incomplete input sequence. FIXME: ought to report this 597 to the caller somehow. */ 598 inleft = 0; 599 break; 600 601 default: 602 perror_with_name (_("Internal error while " 603 "converting character sets")); 604 } 605 } 606 } 607 608 if (!use_wctomb) 609 do_cleanups (cleanups); 610 } 611 612 613 614 /* An iterator that returns host wchar_t's from a target string. */ 615 struct wchar_iterator 616 { 617 /* The underlying iconv descriptor. */ 618 iconv_t desc; 619 620 /* The input string. This is updated as convert characters. */ 621 char *input; 622 /* The number of bytes remaining in the input. */ 623 size_t bytes; 624 625 /* The width of an input character. */ 626 size_t width; 627 628 /* The intermediate buffer */ 629 char *inter; 630 size_t inter_size; 631 size_t inter_len; 632 633 /* The output byte. */ 634 gdb_wchar_t out; 635 }; 636 637 /* Create a new iterator. */ 638 struct wchar_iterator * 639 make_wchar_iterator (const gdb_byte *input, size_t bytes, 640 const char *charset, size_t width) 641 { 642 struct wchar_iterator *result; 643 iconv_t desc; 644 645 desc = iconv_open (host_charset (), charset); 646 if (desc == (iconv_t) -1) 647 perror_with_name (_("Converting character sets")); 648 649 result = XNEW (struct wchar_iterator); 650 result->desc = desc; 651 result->input = (char *) input; 652 result->bytes = bytes; 653 result->width = width; 654 655 result->inter = XNEW (char); 656 result->inter_size = 1; 657 result->inter_len = 0; 658 659 return result; 660 } 661 662 static void 663 do_cleanup_iterator (void *p) 664 { 665 struct wchar_iterator *iter = p; 666 667 iconv_close (iter->desc); 668 xfree (iter->inter); 669 xfree (iter); 670 } 671 672 struct cleanup * 673 make_cleanup_wchar_iterator (struct wchar_iterator *iter) 674 { 675 return make_cleanup (do_cleanup_iterator, iter); 676 } 677 678 int 679 wchar_iterate (struct wchar_iterator *iter, 680 enum wchar_iterate_result *out_result, 681 gdb_wchar_t **out_chars, 682 const gdb_byte **ptr, 683 size_t *len) 684 { 685 size_t out_request; 686 char *orig_inptr = iter->input; 687 size_t orig_in = iter->bytes; 688 689 /* Try to convert some characters. At first we try to convert just 690 a single character. The reason for this is that iconv does not 691 necessarily update its outgoing arguments when it encounters an 692 invalid input sequence -- but we want to reliably report this to 693 our caller so it can emit an escape sequence. */ 694 while (iter->inter_len == 0 && iter->bytes > 0) 695 { 696 out_request = 1; 697 while (iter->bytes > 0) 698 { 699 char *outptr = (char *) &iter->inter[iter->inter_len]; 700 size_t out_avail = out_request; 701 702 size_t r = iconv (iter->desc, 703 (ICONV_CONST char **) &iter->input, &iter->bytes, 704 &outptr, &out_avail); 705 if (r == (size_t) -1) 706 { 707 switch (errno) 708 { 709 case EILSEQ: 710 /* Invalid input sequence. Skip it, and let the caller 711 know about it. */ 712 *out_result = wchar_iterate_invalid; 713 *ptr = iter->input; 714 *len = iter->width; 715 iter->input += iter->width; 716 iter->bytes -= iter->width; 717 return 0; 718 719 case E2BIG: 720 /* We ran out of space. We still might have converted a 721 character; if so, return it. Otherwise, grow the 722 buffer and try again. */ 723 if (out_avail < out_request) 724 break; 725 726 ++out_request; 727 if (out_request > iter->inter_size) 728 { 729 iter->inter_size = out_request; 730 iter->inter = xrealloc (iter->inter, out_request); 731 } 732 continue; 733 734 case EINVAL: 735 /* Incomplete input sequence. Let the caller know, and 736 arrange for future calls to see EOF. */ 737 *out_result = wchar_iterate_incomplete; 738 *ptr = iter->input; 739 *len = iter->bytes; 740 iter->bytes = 0; 741 return 0; 742 743 default: 744 perror_with_name (_("Internal error while " 745 "converting character sets")); 746 } 747 } 748 749 /* We converted something. */ 750 iter->inter_len += out_request - out_avail; 751 break; 752 } 753 } 754 755 if (iter->inter_len > 0) 756 { 757 int r; 758 759 /* Now convert from our charset to wchar_t */ 760 r = mbtowc(&iter->out, &iter->inter[0], iter->inter_len); 761 762 /* This must never happen: we just converted to a valid charset! */ 763 if (r < 0) 764 perror_with_name (_("Internal error while " 765 "converting character sets")); 766 767 /* NUL bytes are alright */ 768 if (r == 0) 769 r = 1; 770 771 iter->inter_len -= r; 772 memmove(&iter->inter[0], &iter->inter[r], iter->inter_len); 773 774 *out_result = wchar_iterate_ok; 775 *out_chars = &iter->out; 776 *ptr = orig_inptr; 777 *len = orig_in - iter->bytes; 778 return 1; 779 } 780 781 /* Really done. */ 782 *out_result = wchar_iterate_eof; 783 return -1; 784 } 785 786 787 /* The charset.c module initialization function. */ 788 789 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ 790 791 DEF_VEC_P (char_ptr); 792 793 static VEC (char_ptr) *charsets; 794 795 #ifdef PHONY_ICONV 796 797 static void 798 find_charset_names (void) 799 { 800 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); 801 VEC_safe_push (char_ptr, charsets, NULL); 802 } 803 804 #else /* PHONY_ICONV */ 805 806 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but 807 provides different symbols in the static and dynamic libraries. 808 So, configure may see libiconvlist but not iconvlist. But, calling 809 iconvlist is the right thing to do and will work. Hence we do a 810 check here but unconditionally call iconvlist below. */ 811 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) 812 813 /* A helper function that adds some character sets to the vector of 814 all character sets. This is a callback function for iconvlist. */ 815 816 static int 817 add_one (unsigned int count, const char *const *names, void *data) 818 { 819 unsigned int i; 820 821 for (i = 0; i < count; ++i) 822 VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); 823 824 return 0; 825 } 826 827 static void 828 find_charset_names (void) 829 { 830 iconvlist (add_one, NULL); 831 VEC_safe_push (char_ptr, charsets, NULL); 832 } 833 834 #else 835 836 /* Return non-zero if LINE (output from iconv) should be ignored. 837 Older iconv programs (e.g. 2.2.2) include the human readable 838 introduction even when stdout is not a tty. Newer versions omit 839 the intro if stdout is not a tty. */ 840 841 static int 842 ignore_line_p (const char *line) 843 { 844 /* This table is used to filter the output. If this text appears 845 anywhere in the line, it is ignored (strstr is used). */ 846 static const char * const ignore_lines[] = 847 { 848 "The following", 849 "not necessarily", 850 "the FROM and TO", 851 "listed with several", 852 NULL 853 }; 854 int i; 855 856 for (i = 0; ignore_lines[i] != NULL; ++i) 857 { 858 if (strstr (line, ignore_lines[i]) != NULL) 859 return 1; 860 } 861 862 return 0; 863 } 864 865 static void 866 find_charset_names (void) 867 { 868 struct pex_obj *child; 869 char *args[3]; 870 int err, status; 871 int fail = 1; 872 struct gdb_environ *iconv_env; 873 874 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is 875 not a tty. We need to recognize it and ignore it. This text is 876 subject to translation, so force LANGUAGE=C. */ 877 iconv_env = make_environ (); 878 init_environ (iconv_env); 879 set_in_environ (iconv_env, "LANGUAGE", "C"); 880 set_in_environ (iconv_env, "LC_ALL", "C"); 881 882 child = pex_init (PEX_USE_PIPES, "iconv", NULL); 883 884 args[0] = "iconv"; 885 args[1] = "-l"; 886 args[2] = NULL; 887 /* Note that we simply ignore errors here. */ 888 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT, 889 "iconv", args, environ_vector (iconv_env), 890 NULL, NULL, &err)) 891 { 892 FILE *in = pex_read_output (child, 0); 893 894 /* POSIX says that iconv -l uses an unspecified format. We 895 parse the glibc and libiconv formats; feel free to add others 896 as needed. */ 897 898 while (!feof (in)) 899 { 900 /* The size of buf is chosen arbitrarily. */ 901 char buf[1024]; 902 char *start, *r; 903 int len; 904 905 r = fgets (buf, sizeof (buf), in); 906 if (!r) 907 break; 908 len = strlen (r); 909 if (len <= 3) 910 continue; 911 if (ignore_line_p (r)) 912 continue; 913 914 /* Strip off the newline. */ 915 --len; 916 /* Strip off one or two '/'s. glibc will print lines like 917 "8859_7//", but also "10646-1:1993/UCS4/". */ 918 if (buf[len - 1] == '/') 919 --len; 920 if (buf[len - 1] == '/') 921 --len; 922 buf[len] = '\0'; 923 924 /* libiconv will print multiple entries per line, separated 925 by spaces. Older iconvs will print multiple entries per 926 line, indented by two spaces, and separated by ", " 927 (i.e. the human readable form). */ 928 start = buf; 929 while (1) 930 { 931 int keep_going; 932 char *p; 933 934 /* Skip leading blanks. */ 935 for (p = start; *p && *p == ' '; ++p) 936 ; 937 start = p; 938 /* Find the next space, comma, or end-of-line. */ 939 for ( ; *p && *p != ' ' && *p != ','; ++p) 940 ; 941 /* Ignore an empty result. */ 942 if (p == start) 943 break; 944 keep_going = *p; 945 *p = '\0'; 946 VEC_safe_push (char_ptr, charsets, xstrdup (start)); 947 if (!keep_going) 948 break; 949 /* Skip any extra spaces. */ 950 for (start = p + 1; *start && *start == ' '; ++start) 951 ; 952 } 953 } 954 955 if (pex_get_status (child, 1, &status) 956 && WIFEXITED (status) && !WEXITSTATUS (status)) 957 fail = 0; 958 959 } 960 961 pex_free (child); 962 free_environ (iconv_env); 963 964 if (fail) 965 { 966 /* Some error occurred, so drop the vector. */ 967 int ix; 968 char *elt; 969 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix) 970 xfree (elt); 971 VEC_truncate (char_ptr, charsets, 0); 972 } 973 else 974 VEC_safe_push (char_ptr, charsets, NULL); 975 } 976 977 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ 978 #endif /* PHONY_ICONV */ 979 980 /* The "auto" target charset used by default_auto_charset. */ 981 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; 982 983 const char * 984 default_auto_charset (void) 985 { 986 return auto_target_charset_name; 987 } 988 989 const char * 990 default_auto_wide_charset (void) 991 { 992 return GDB_DEFAULT_TARGET_WIDE_CHARSET; 993 } 994 995 996 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION 997 /* Macro used for UTF or UCS endianness suffix. */ 998 #if WORDS_BIGENDIAN 999 #define ENDIAN_SUFFIX "BE" 1000 #else 1001 #define ENDIAN_SUFFIX "LE" 1002 #endif 1003 1004 /* The code below serves to generate a compile time error if 1005 gdb_wchar_t type is not of size 2 nor 4, despite the fact that 1006 macro __STDC_ISO_10646__ is defined. 1007 This is better than a gdb_assert call, because GDB cannot handle 1008 strings correctly if this size is different. */ 1009 1010 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2 1011 || sizeof (gdb_wchar_t) == 4) 1012 ? 1 : -1]; 1013 1014 /* intermediate_encoding returns the charset unsed internally by 1015 GDB to convert between target and host encodings. As the test above 1016 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. 1017 UTF-16/32 is tested first, UCS-2/4 is tested as a second option, 1018 otherwise an error is generated. */ 1019 1020 const char * 1021 intermediate_encoding (void) 1022 { 1023 iconv_t desc; 1024 static const char *stored_result = NULL; 1025 char *result; 1026 int i; 1027 1028 if (stored_result) 1029 return stored_result; 1030 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), 1031 ENDIAN_SUFFIX); 1032 /* Check that the name is supported by iconv_open. */ 1033 desc = iconv_open (result, host_charset ()); 1034 if (desc != (iconv_t) -1) 1035 { 1036 iconv_close (desc); 1037 stored_result = result; 1038 return result; 1039 } 1040 /* Not valid, free the allocated memory. */ 1041 xfree (result); 1042 /* Second try, with UCS-2 type. */ 1043 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), 1044 ENDIAN_SUFFIX); 1045 /* Check that the name is supported by iconv_open. */ 1046 desc = iconv_open (result, host_charset ()); 1047 if (desc != (iconv_t) -1) 1048 { 1049 iconv_close (desc); 1050 stored_result = result; 1051 return result; 1052 } 1053 /* Not valid, free the allocated memory. */ 1054 xfree (result); 1055 /* No valid charset found, generate error here. */ 1056 error (_("Unable to find a vaild charset for string conversions")); 1057 } 1058 1059 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ 1060 1061 void 1062 _initialize_charset (void) 1063 { 1064 /* The first element is always "auto". */ 1065 VEC_safe_push (char_ptr, charsets, xstrdup ("auto")); 1066 find_charset_names (); 1067 1068 if (VEC_length (char_ptr, charsets) > 1) 1069 charset_enum = (const char **) VEC_address (char_ptr, charsets); 1070 else 1071 charset_enum = default_charset_names; 1072 1073 #ifndef PHONY_ICONV 1074 #ifdef HAVE_LANGINFO_CODESET 1075 /* The result of nl_langinfo may be overwritten later. This may 1076 leak a little memory, if the user later changes the host charset, 1077 but that doesn't matter much. */ 1078 auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); 1079 /* Solaris will return `646' here -- but the Solaris iconv then does 1080 not accept this. Darwin (and maybe FreeBSD) may return "" here, 1081 which GNU libiconv doesn't like (infinite loop). */ 1082 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) 1083 auto_host_charset_name = "ASCII"; 1084 auto_target_charset_name = auto_host_charset_name; 1085 #elif defined (USE_WIN32API) 1086 { 1087 /* "CP" + x<=5 digits + paranoia. */ 1088 static char w32_host_default_charset[16]; 1089 1090 snprintf (w32_host_default_charset, sizeof w32_host_default_charset, 1091 "CP%d", GetACP()); 1092 auto_host_charset_name = w32_host_default_charset; 1093 auto_target_charset_name = auto_host_charset_name; 1094 } 1095 #endif 1096 #endif 1097 1098 add_setshow_enum_cmd ("charset", class_support, 1099 charset_enum, &host_charset_name, _("\ 1100 Set the host and target character sets."), _("\ 1101 Show the host and target character sets."), _("\ 1102 The `host character set' is the one used by the system GDB is running on.\n\ 1103 The `target character set' is the one used by the program being debugged.\n\ 1104 You may only use supersets of ASCII for your host character set; GDB does\n\ 1105 not support any others.\n\ 1106 To see a list of the character sets GDB supports, type `set charset <TAB>'."), 1107 /* Note that the sfunc below needs to set 1108 target_charset_name, because the 'set 1109 charset' command sets two variables. */ 1110 set_charset_sfunc, 1111 show_charset, 1112 &setlist, &showlist); 1113 1114 add_setshow_enum_cmd ("host-charset", class_support, 1115 charset_enum, &host_charset_name, _("\ 1116 Set the host character set."), _("\ 1117 Show the host character set."), _("\ 1118 The `host character set' is the one used by the system GDB is running on.\n\ 1119 You may only use supersets of ASCII for your host character set; GDB does\n\ 1120 not support any others.\n\ 1121 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), 1122 set_host_charset_sfunc, 1123 show_host_charset_name, 1124 &setlist, &showlist); 1125 1126 add_setshow_enum_cmd ("target-charset", class_support, 1127 charset_enum, &target_charset_name, _("\ 1128 Set the target character set."), _("\ 1129 Show the target character set."), _("\ 1130 The `target character set' is the one used by the program being debugged.\n\ 1131 GDB translates characters and strings between the host and target\n\ 1132 character sets as needed.\n\ 1133 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), 1134 set_target_charset_sfunc, 1135 show_target_charset_name, 1136 &setlist, &showlist); 1137 1138 add_setshow_enum_cmd ("target-wide-charset", class_support, 1139 charset_enum, &target_wide_charset_name, 1140 _("\ 1141 Set the target wide character set."), _("\ 1142 Show the target wide character set."), _("\ 1143 The `target wide character set' is the one used by the program being debugged.\ 1144 \nIn particular it is the encoding used by `wchar_t'.\n\ 1145 GDB translates characters and strings between the host and target\n\ 1146 character sets as needed.\n\ 1147 To see a list of the character sets GDB supports, type\n\ 1148 `set target-wide-charset'<TAB>"), 1149 set_target_wide_charset_sfunc, 1150 show_target_wide_charset_name, 1151 &setlist, &showlist); 1152 } 1153