1 /* Character set conversion support for GDB. 2 3 Copyright (C) 2001, 2003, 2007-2012 Free Software Foundation, Inc. 4 5 This file is part of GDB. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 #include "defs.h" 21 #include "charset.h" 22 #include "gdbcmd.h" 23 #include "gdb_assert.h" 24 #include "gdb_obstack.h" 25 #include "gdb_wait.h" 26 #include "charset-list.h" 27 #include "vec.h" 28 #include "environ.h" 29 #include "arch-utils.h" 30 31 #include <stddef.h> 32 #include "gdb_string.h" 33 #include <ctype.h> 34 35 #ifdef USE_WIN32API 36 #include <windows.h> 37 #endif 38 39 /* How GDB's character set support works 40 41 GDB has three global settings: 42 43 - The `current host character set' is the character set GDB should 44 use in talking to the user, and which (hopefully) the user's 45 terminal knows how to display properly. Most users should not 46 change this. 47 48 - The `current target character set' is the character set the 49 program being debugged uses. 50 51 - The `current target wide character set' is the wide character set 52 the program being debugged uses, that is, the encoding used for 53 wchar_t. 54 55 There are commands to set each of these, and mechanisms for 56 choosing reasonable default values. GDB has a global list of 57 character sets that it can use as its host or target character 58 sets. 59 60 The header file `charset.h' declares various functions that 61 different pieces of GDB need to perform tasks like: 62 63 - printing target strings and characters to the user's terminal 64 (mostly target->host conversions), 65 66 - building target-appropriate representations of strings and 67 characters the user enters in expressions (mostly host->target 68 conversions), 69 70 and so on. 71 72 To avoid excessive code duplication and maintenance efforts, 73 GDB simply requires a capable iconv function. Users on platforms 74 without a suitable iconv can use the GNU iconv library. */ 75 76 77 #ifdef PHONY_ICONV 78 79 /* Provide a phony iconv that does as little as possible. Also, 80 arrange for there to be a single available character set. */ 81 82 #undef GDB_DEFAULT_HOST_CHARSET 83 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" 84 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 85 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" 86 #undef DEFAULT_CHARSET_NAMES 87 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , 88 89 #undef iconv_t 90 #define iconv_t int 91 #undef iconv_open 92 #define iconv_open phony_iconv_open 93 #undef iconv 94 #define iconv phony_iconv 95 #undef iconv_close 96 #define iconv_close phony_iconv_close 97 98 #undef ICONV_CONST 99 #define ICONV_CONST const 100 101 /* Some systems don't have EILSEQ, so we define it here, but not as 102 EINVAL, because callers of `iconv' want to distinguish EINVAL and 103 EILSEQ. This is what iconv.h from libiconv does as well. Note 104 that wchar.h may also define EILSEQ, so this needs to be after we 105 include wchar.h, which happens in defs.h through gdb_wchar.h. */ 106 #ifndef EILSEQ 107 #define EILSEQ ENOENT 108 #endif 109 110 iconv_t 111 phony_iconv_open (const char *to, const char *from) 112 { 113 /* We allow conversions from UTF-32BE, wchar_t, and the host charset. 114 We allow conversions to wchar_t and the host charset. */ 115 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") 116 && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) 117 return -1; 118 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) 119 return -1; 120 121 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is 122 used as a flag in calls to iconv. */ 123 return !strcmp (from, "UTF-32BE"); 124 } 125 126 int 127 phony_iconv_close (iconv_t arg) 128 { 129 return 0; 130 } 131 132 size_t 133 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, 134 char **outbuf, size_t *outbytesleft) 135 { 136 if (utf_flag) 137 { 138 while (*inbytesleft >= 4) 139 { 140 size_t j; 141 unsigned long c = 0; 142 143 for (j = 0; j < 4; ++j) 144 { 145 c <<= 8; 146 c += (*inbuf)[j] & 0xff; 147 } 148 149 if (c >= 256) 150 { 151 errno = EILSEQ; 152 return -1; 153 } 154 **outbuf = c & 0xff; 155 ++*outbuf; 156 --*outbytesleft; 157 158 ++*inbuf; 159 *inbytesleft -= 4; 160 } 161 if (*inbytesleft < 4) 162 { 163 errno = EINVAL; 164 return -1; 165 } 166 } 167 else 168 { 169 /* In all other cases we simply copy input bytes to the 170 output. */ 171 size_t amt = *inbytesleft; 172 173 if (amt > *outbytesleft) 174 amt = *outbytesleft; 175 memcpy (*outbuf, *inbuf, amt); 176 *inbuf += amt; 177 *outbuf += amt; 178 *inbytesleft -= amt; 179 *outbytesleft -= amt; 180 } 181 182 if (*inbytesleft) 183 { 184 errno = E2BIG; 185 return -1; 186 } 187 188 /* The number of non-reversible conversions -- but they were all 189 reversible. */ 190 return 0; 191 } 192 193 #endif 194 195 196 197 /* The global lists of character sets and translations. */ 198 199 200 #ifndef GDB_DEFAULT_TARGET_CHARSET 201 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 202 #endif 203 204 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET 205 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" 206 #endif 207 208 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; 209 static const char *host_charset_name = "auto"; 210 static void 211 show_host_charset_name (struct ui_file *file, int from_tty, 212 struct cmd_list_element *c, 213 const char *value) 214 { 215 if (!strcmp (value, "auto")) 216 fprintf_filtered (file, 217 _("The host character set is \"auto; currently %s\".\n"), 218 auto_host_charset_name); 219 else 220 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); 221 } 222 223 static const char *target_charset_name = "auto"; 224 static void 225 show_target_charset_name (struct ui_file *file, int from_tty, 226 struct cmd_list_element *c, const char *value) 227 { 228 if (!strcmp (value, "auto")) 229 fprintf_filtered (file, 230 _("The target character set is \"auto; " 231 "currently %s\".\n"), 232 gdbarch_auto_charset (get_current_arch ())); 233 else 234 fprintf_filtered (file, _("The target character set is \"%s\".\n"), 235 value); 236 } 237 238 static const char *target_wide_charset_name = "auto"; 239 static void 240 show_target_wide_charset_name (struct ui_file *file, 241 int from_tty, 242 struct cmd_list_element *c, 243 const char *value) 244 { 245 if (!strcmp (value, "auto")) 246 fprintf_filtered (file, 247 _("The target wide character set is \"auto; " 248 "currently %s\".\n"), 249 gdbarch_auto_wide_charset (get_current_arch ())); 250 else 251 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), 252 value); 253 } 254 255 static const char *default_charset_names[] = 256 { 257 DEFAULT_CHARSET_NAMES 258 0 259 }; 260 261 static const char **charset_enum; 262 263 264 /* If the target wide character set has big- or little-endian 265 variants, these are the corresponding names. */ 266 static const char *target_wide_charset_be_name; 267 static const char *target_wide_charset_le_name; 268 269 /* The architecture for which the BE- and LE-names are valid. */ 270 static struct gdbarch *be_le_arch; 271 272 /* A helper function which sets the target wide big- and little-endian 273 character set names, if possible. */ 274 275 static void 276 set_be_le_names (struct gdbarch *gdbarch) 277 { 278 int i, len; 279 const char *target_wide; 280 281 if (be_le_arch == gdbarch) 282 return; 283 be_le_arch = gdbarch; 284 285 target_wide_charset_le_name = NULL; 286 target_wide_charset_be_name = NULL; 287 288 target_wide = target_wide_charset_name; 289 if (!strcmp (target_wide, "auto")) 290 target_wide = gdbarch_auto_wide_charset (gdbarch); 291 292 len = strlen (target_wide); 293 for (i = 0; charset_enum[i]; ++i) 294 { 295 if (strncmp (target_wide, charset_enum[i], len)) 296 continue; 297 if ((charset_enum[i][len] == 'B' 298 || charset_enum[i][len] == 'L') 299 && charset_enum[i][len + 1] == 'E' 300 && charset_enum[i][len + 2] == '\0') 301 { 302 if (charset_enum[i][len] == 'B') 303 target_wide_charset_be_name = charset_enum[i]; 304 else 305 target_wide_charset_le_name = charset_enum[i]; 306 } 307 } 308 } 309 310 /* 'Set charset', 'set host-charset', 'set target-charset', 'set 311 target-wide-charset', 'set charset' sfunc's. */ 312 313 static void 314 validate (struct gdbarch *gdbarch) 315 { 316 iconv_t desc; 317 const char *host_cset = host_charset (); 318 const char *target_cset = target_charset (gdbarch); 319 const char *target_wide_cset = target_wide_charset_name; 320 321 if (!strcmp (target_wide_cset, "auto")) 322 target_wide_cset = gdbarch_auto_wide_charset (gdbarch); 323 324 desc = iconv_open (target_wide_cset, host_cset); 325 if (desc == (iconv_t) -1) 326 error (_("Cannot convert between character sets `%s' and `%s'"), 327 target_wide_cset, host_cset); 328 iconv_close (desc); 329 330 desc = iconv_open (target_cset, host_cset); 331 if (desc == (iconv_t) -1) 332 error (_("Cannot convert between character sets `%s' and `%s'"), 333 target_cset, host_cset); 334 iconv_close (desc); 335 336 /* Clear the cache. */ 337 be_le_arch = NULL; 338 } 339 340 /* This is the sfunc for the 'set charset' command. */ 341 static void 342 set_charset_sfunc (char *charset, int from_tty, 343 struct cmd_list_element *c) 344 { 345 /* CAREFUL: set the target charset here as well. */ 346 target_charset_name = host_charset_name; 347 validate (get_current_arch ()); 348 } 349 350 /* 'set host-charset' command sfunc. We need a wrapper here because 351 the function needs to have a specific signature. */ 352 static void 353 set_host_charset_sfunc (char *charset, int from_tty, 354 struct cmd_list_element *c) 355 { 356 validate (get_current_arch ()); 357 } 358 359 /* Wrapper for the 'set target-charset' command. */ 360 static void 361 set_target_charset_sfunc (char *charset, int from_tty, 362 struct cmd_list_element *c) 363 { 364 validate (get_current_arch ()); 365 } 366 367 /* Wrapper for the 'set target-wide-charset' command. */ 368 static void 369 set_target_wide_charset_sfunc (char *charset, int from_tty, 370 struct cmd_list_element *c) 371 { 372 validate (get_current_arch ()); 373 } 374 375 /* sfunc for the 'show charset' command. */ 376 static void 377 show_charset (struct ui_file *file, int from_tty, 378 struct cmd_list_element *c, 379 const char *name) 380 { 381 show_host_charset_name (file, from_tty, c, host_charset_name); 382 show_target_charset_name (file, from_tty, c, target_charset_name); 383 show_target_wide_charset_name (file, from_tty, c, 384 target_wide_charset_name); 385 } 386 387 388 /* Accessor functions. */ 389 390 const char * 391 host_charset (void) 392 { 393 if (!strcmp (host_charset_name, "auto")) 394 return auto_host_charset_name; 395 return host_charset_name; 396 } 397 398 const char * 399 target_charset (struct gdbarch *gdbarch) 400 { 401 if (!strcmp (target_charset_name, "auto")) 402 return gdbarch_auto_charset (gdbarch); 403 return target_charset_name; 404 } 405 406 const char * 407 target_wide_charset (struct gdbarch *gdbarch) 408 { 409 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); 410 411 set_be_le_names (gdbarch); 412 if (byte_order == BFD_ENDIAN_BIG) 413 { 414 if (target_wide_charset_be_name) 415 return target_wide_charset_be_name; 416 } 417 else 418 { 419 if (target_wide_charset_le_name) 420 return target_wide_charset_le_name; 421 } 422 423 if (!strcmp (target_wide_charset_name, "auto")) 424 return gdbarch_auto_wide_charset (gdbarch); 425 426 return target_wide_charset_name; 427 } 428 429 430 /* Host character set management. For the time being, we assume that 431 the host character set is some superset of ASCII. */ 432 433 char 434 host_letter_to_control_character (char c) 435 { 436 if (c == '?') 437 return 0177; 438 return c & 0237; 439 } 440 441 /* Convert a host character, C, to its hex value. C must already have 442 been validated using isxdigit. */ 443 444 int 445 host_hex_value (char c) 446 { 447 if (isdigit (c)) 448 return c - '0'; 449 if (c >= 'a' && c <= 'f') 450 return 10 + c - 'a'; 451 gdb_assert (c >= 'A' && c <= 'F'); 452 return 10 + c - 'A'; 453 } 454 455 456 /* Public character management functions. */ 457 458 /* A cleanup function which is run to close an iconv descriptor. */ 459 460 static void 461 cleanup_iconv (void *p) 462 { 463 iconv_t *descp = p; 464 iconv_close (*descp); 465 } 466 467 static size_t 468 convert_wchar (gdb_wchar_t **pinp, size_t *pinleft, char **poutp, size_t *poutleft) 469 { 470 char tmp[MB_CUR_MAX]; 471 int r; 472 473 while (*pinleft >= sizeof(gdb_wchar_t)) 474 { 475 r = wctomb(tmp, **pinp); 476 477 if (r == -1) 478 perror_with_name ("Internal error while converting character sets"); 479 480 if (*poutleft < r) 481 { 482 errno = E2BIG; 483 return (size_t) -1; 484 } 485 486 memcpy(*poutp, tmp, r); 487 *poutp += r; 488 *poutleft -= r; 489 ++*pinp; 490 *pinleft -= sizeof(gdb_wchar_t); 491 } 492 493 if (*pinleft != 0) 494 return EINVAL; 495 496 return 0; 497 } 498 499 void 500 convert_between_encodings (const char *from, const char *to, 501 const gdb_byte *bytes, unsigned int num_bytes, 502 int width, struct obstack *output, 503 enum transliterations translit) 504 { 505 iconv_t desc; 506 struct cleanup *cleanups; 507 size_t inleft; 508 char *inp; 509 unsigned int space_request; 510 int use_wctomb = 0; 511 512 /* Often, the host and target charsets will be the same. */ 513 if (!strcmp (from, to)) 514 { 515 obstack_grow (output, bytes, num_bytes); 516 return; 517 } 518 519 if (!strcmp (from, "wchar_t")) 520 { 521 if (strcmp (to, host_charset ())) 522 perror_with_name (_("Converting character sets")); 523 cleanups = NULL; /* silence gcc complaints */ 524 use_wctomb = 1; 525 } 526 else 527 { 528 desc = iconv_open (to, from); 529 if (desc == (iconv_t) -1) 530 perror_with_name (_("Converting character sets")); 531 cleanups = make_cleanup (cleanup_iconv, &desc); 532 } 533 534 inleft = num_bytes; 535 inp = (char *) bytes; 536 537 space_request = num_bytes; 538 539 while (inleft > 0) 540 { 541 char *outp; 542 size_t outleft, r; 543 int old_size; 544 545 old_size = obstack_object_size (output); 546 obstack_blank (output, space_request); 547 548 outp = obstack_base (output) + old_size; 549 outleft = space_request; 550 551 if (use_wctomb) 552 r = convert_wchar((gdb_wchar_t **)(void *)&inp, &inleft, &outp, &outleft); 553 else 554 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft); 555 556 /* Now make sure that the object on the obstack only includes 557 bytes we have converted. */ 558 obstack_blank (output, - (int) outleft); 559 560 if (r == (size_t) -1) 561 { 562 switch (errno) 563 { 564 case EILSEQ: 565 { 566 int i; 567 568 /* Invalid input sequence. */ 569 if (translit == translit_none) 570 error (_("Could not convert character " 571 "to `%s' character set"), to); 572 573 /* We emit escape sequence for the bytes, skip them, 574 and try again. */ 575 for (i = 0; i < width; ++i) 576 { 577 char octal[5]; 578 579 sprintf (octal, "\\%.3o", *inp & 0xff); 580 obstack_grow_str (output, octal); 581 582 ++inp; 583 --inleft; 584 } 585 } 586 break; 587 588 case E2BIG: 589 /* We ran out of space in the output buffer. Make it 590 bigger next time around. */ 591 space_request *= 2; 592 break; 593 594 case EINVAL: 595 /* Incomplete input sequence. FIXME: ought to report this 596 to the caller somehow. */ 597 inleft = 0; 598 break; 599 600 default: 601 perror_with_name (_("Internal error while " 602 "converting character sets")); 603 } 604 } 605 } 606 607 if (!use_wctomb) 608 do_cleanups (cleanups); 609 } 610 611 612 613 /* An iterator that returns host wchar_t's from a target string. */ 614 struct wchar_iterator 615 { 616 /* The underlying iconv descriptor. */ 617 iconv_t desc; 618 619 /* The input string. This is updated as convert characters. */ 620 char *input; 621 /* The number of bytes remaining in the input. */ 622 size_t bytes; 623 624 /* The width of an input character. */ 625 size_t width; 626 627 /* The intermediate buffer */ 628 char *inter; 629 size_t inter_size; 630 size_t inter_len; 631 632 /* The output byte. */ 633 gdb_wchar_t out; 634 }; 635 636 /* Create a new iterator. */ 637 struct wchar_iterator * 638 make_wchar_iterator (const gdb_byte *input, size_t bytes, 639 const char *charset, size_t width) 640 { 641 struct wchar_iterator *result; 642 iconv_t desc; 643 644 desc = iconv_open (host_charset (), charset); 645 if (desc == (iconv_t) -1) 646 perror_with_name (_("Converting character sets")); 647 648 result = XNEW (struct wchar_iterator); 649 result->desc = desc; 650 result->input = (char *) input; 651 result->bytes = bytes; 652 result->width = width; 653 654 result->inter = XNEW (char); 655 result->inter_size = 1; 656 result->inter_len = 0; 657 658 return result; 659 } 660 661 static void 662 do_cleanup_iterator (void *p) 663 { 664 struct wchar_iterator *iter = p; 665 666 iconv_close (iter->desc); 667 xfree (iter->inter); 668 xfree (iter); 669 } 670 671 struct cleanup * 672 make_cleanup_wchar_iterator (struct wchar_iterator *iter) 673 { 674 return make_cleanup (do_cleanup_iterator, iter); 675 } 676 677 int 678 wchar_iterate (struct wchar_iterator *iter, 679 enum wchar_iterate_result *out_result, 680 gdb_wchar_t **out_chars, 681 const gdb_byte **ptr, 682 size_t *len) 683 { 684 size_t out_request; 685 char *orig_inptr = iter->input; 686 size_t orig_in = iter->bytes; 687 688 /* Try to convert some characters. At first we try to convert just 689 a single character. The reason for this is that iconv does not 690 necessarily update its outgoing arguments when it encounters an 691 invalid input sequence -- but we want to reliably report this to 692 our caller so it can emit an escape sequence. */ 693 while (iter->inter_len == 0 && iter->bytes > 0) 694 { 695 out_request = 1; 696 while (iter->bytes > 0) 697 { 698 char *outptr = (char *) &iter->inter[iter->inter_len]; 699 size_t out_avail = out_request; 700 701 size_t r = iconv (iter->desc, 702 (ICONV_CONST char **) &iter->input, &iter->bytes, 703 &outptr, &out_avail); 704 if (r == (size_t) -1) 705 { 706 switch (errno) 707 { 708 case EILSEQ: 709 /* Invalid input sequence. Skip it, and let the caller 710 know about it. */ 711 *out_result = wchar_iterate_invalid; 712 *ptr = iter->input; 713 *len = iter->width; 714 iter->input += iter->width; 715 iter->bytes -= iter->width; 716 return 0; 717 718 case E2BIG: 719 /* We ran out of space. We still might have converted a 720 character; if so, return it. Otherwise, grow the 721 buffer and try again. */ 722 if (out_avail < out_request) 723 break; 724 725 ++out_request; 726 if (out_request > iter->inter_size) 727 { 728 iter->inter_size = out_request; 729 iter->inter = xrealloc (iter->inter, out_request); 730 } 731 continue; 732 733 case EINVAL: 734 /* Incomplete input sequence. Let the caller know, and 735 arrange for future calls to see EOF. */ 736 *out_result = wchar_iterate_incomplete; 737 *ptr = iter->input; 738 *len = iter->bytes; 739 iter->bytes = 0; 740 return 0; 741 742 default: 743 perror_with_name (_("Internal error while " 744 "converting character sets")); 745 } 746 } 747 748 /* We converted something. */ 749 iter->inter_len += out_request - out_avail; 750 break; 751 } 752 } 753 754 if (iter->inter_len > 0) 755 { 756 int r; 757 758 /* Now convert from our charset to wchar_t */ 759 r = mbtowc(&iter->out, &iter->inter[0], iter->inter_len); 760 761 /* This must never happen: we just converted to a valid charset! */ 762 if (r < 0) 763 perror_with_name (_("Internal error while " 764 "converting character sets")); 765 766 /* NUL bytes are alright */ 767 if (r == 0) 768 r = 1; 769 770 iter->inter_len -= r; 771 memmove(&iter->inter[0], &iter->inter[r], iter->inter_len); 772 773 *out_result = wchar_iterate_ok; 774 *out_chars = &iter->out; 775 *ptr = orig_inptr; 776 *len = orig_in - iter->bytes; 777 return 1; 778 } 779 780 /* Really done. */ 781 *out_result = wchar_iterate_eof; 782 return -1; 783 } 784 785 786 /* The charset.c module initialization function. */ 787 788 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ 789 790 DEF_VEC_P (char_ptr); 791 792 static VEC (char_ptr) *charsets; 793 794 #ifdef PHONY_ICONV 795 796 static void 797 find_charset_names (void) 798 { 799 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); 800 VEC_safe_push (char_ptr, charsets, NULL); 801 } 802 803 #else /* PHONY_ICONV */ 804 805 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but 806 provides different symbols in the static and dynamic libraries. 807 So, configure may see libiconvlist but not iconvlist. But, calling 808 iconvlist is the right thing to do and will work. Hence we do a 809 check here but unconditionally call iconvlist below. */ 810 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) 811 812 /* A helper function that adds some character sets to the vector of 813 all character sets. This is a callback function for iconvlist. */ 814 815 static int 816 add_one (unsigned int count, const char *const *names, void *data) 817 { 818 unsigned int i; 819 820 for (i = 0; i < count; ++i) 821 VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); 822 823 return 0; 824 } 825 826 static void 827 find_charset_names (void) 828 { 829 iconvlist (add_one, NULL); 830 VEC_safe_push (char_ptr, charsets, NULL); 831 } 832 833 #else 834 835 /* Return non-zero if LINE (output from iconv) should be ignored. 836 Older iconv programs (e.g. 2.2.2) include the human readable 837 introduction even when stdout is not a tty. Newer versions omit 838 the intro if stdout is not a tty. */ 839 840 static int 841 ignore_line_p (const char *line) 842 { 843 /* This table is used to filter the output. If this text appears 844 anywhere in the line, it is ignored (strstr is used). */ 845 static const char * const ignore_lines[] = 846 { 847 "The following", 848 "not necessarily", 849 "the FROM and TO", 850 "listed with several", 851 NULL 852 }; 853 int i; 854 855 for (i = 0; ignore_lines[i] != NULL; ++i) 856 { 857 if (strstr (line, ignore_lines[i]) != NULL) 858 return 1; 859 } 860 861 return 0; 862 } 863 864 static void 865 find_charset_names (void) 866 { 867 struct pex_obj *child; 868 char *args[3]; 869 int err, status; 870 int fail = 1; 871 int flags; 872 struct gdb_environ *iconv_env; 873 char *iconv_program; 874 875 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is 876 not a tty. We need to recognize it and ignore it. This text is 877 subject to translation, so force LANGUAGE=C. */ 878 iconv_env = make_environ (); 879 init_environ (iconv_env); 880 set_in_environ (iconv_env, "LANGUAGE", "C"); 881 set_in_environ (iconv_env, "LC_ALL", "C"); 882 883 child = pex_init (PEX_USE_PIPES, "iconv", NULL); 884 885 #ifdef ICONV_BIN 886 { 887 char *iconv_dir = relocate_gdb_directory (ICONV_BIN, 888 ICONV_BIN_RELOCATABLE); 889 iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL); 890 xfree (iconv_dir); 891 } 892 #else 893 iconv_program = xstrdup ("iconv"); 894 #endif 895 args[0] = iconv_program; 896 args[1] = "-l"; 897 args[2] = NULL; 898 flags = PEX_STDERR_TO_STDOUT; 899 #ifndef ICONV_BIN 900 flags |= PEX_SEARCH; 901 #endif 902 /* Note that we simply ignore errors here. */ 903 if (!pex_run_in_environment (child, flags, 904 args[0], args, environ_vector (iconv_env), 905 NULL, NULL, &err)) 906 { 907 FILE *in = pex_read_output (child, 0); 908 909 /* POSIX says that iconv -l uses an unspecified format. We 910 parse the glibc and libiconv formats; feel free to add others 911 as needed. */ 912 913 while (!feof (in)) 914 { 915 /* The size of buf is chosen arbitrarily. */ 916 char buf[1024]; 917 char *start, *r; 918 int len; 919 920 r = fgets (buf, sizeof (buf), in); 921 if (!r) 922 break; 923 len = strlen (r); 924 if (len <= 3) 925 continue; 926 if (ignore_line_p (r)) 927 continue; 928 929 /* Strip off the newline. */ 930 --len; 931 /* Strip off one or two '/'s. glibc will print lines like 932 "8859_7//", but also "10646-1:1993/UCS4/". */ 933 if (buf[len - 1] == '/') 934 --len; 935 if (buf[len - 1] == '/') 936 --len; 937 buf[len] = '\0'; 938 939 /* libiconv will print multiple entries per line, separated 940 by spaces. Older iconvs will print multiple entries per 941 line, indented by two spaces, and separated by ", " 942 (i.e. the human readable form). */ 943 start = buf; 944 while (1) 945 { 946 int keep_going; 947 char *p; 948 949 /* Skip leading blanks. */ 950 for (p = start; *p && *p == ' '; ++p) 951 ; 952 start = p; 953 /* Find the next space, comma, or end-of-line. */ 954 for ( ; *p && *p != ' ' && *p != ','; ++p) 955 ; 956 /* Ignore an empty result. */ 957 if (p == start) 958 break; 959 keep_going = *p; 960 *p = '\0'; 961 VEC_safe_push (char_ptr, charsets, xstrdup (start)); 962 if (!keep_going) 963 break; 964 /* Skip any extra spaces. */ 965 for (start = p + 1; *start && *start == ' '; ++start) 966 ; 967 } 968 } 969 970 if (pex_get_status (child, 1, &status) 971 && WIFEXITED (status) && !WEXITSTATUS (status)) 972 fail = 0; 973 974 } 975 976 xfree (iconv_program); 977 pex_free (child); 978 free_environ (iconv_env); 979 980 if (fail) 981 { 982 /* Some error occurred, so drop the vector. */ 983 int ix; 984 char *elt; 985 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix) 986 xfree (elt); 987 VEC_truncate (char_ptr, charsets, 0); 988 } 989 else 990 VEC_safe_push (char_ptr, charsets, NULL); 991 } 992 993 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ 994 #endif /* PHONY_ICONV */ 995 996 /* The "auto" target charset used by default_auto_charset. */ 997 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; 998 999 const char * 1000 default_auto_charset (void) 1001 { 1002 return auto_target_charset_name; 1003 } 1004 1005 const char * 1006 default_auto_wide_charset (void) 1007 { 1008 return GDB_DEFAULT_TARGET_WIDE_CHARSET; 1009 } 1010 1011 1012 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION 1013 /* Macro used for UTF or UCS endianness suffix. */ 1014 #if WORDS_BIGENDIAN 1015 #define ENDIAN_SUFFIX "BE" 1016 #else 1017 #define ENDIAN_SUFFIX "LE" 1018 #endif 1019 1020 /* The code below serves to generate a compile time error if 1021 gdb_wchar_t type is not of size 2 nor 4, despite the fact that 1022 macro __STDC_ISO_10646__ is defined. 1023 This is better than a gdb_assert call, because GDB cannot handle 1024 strings correctly if this size is different. */ 1025 1026 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2 1027 || sizeof (gdb_wchar_t) == 4) 1028 ? 1 : -1]; 1029 1030 /* intermediate_encoding returns the charset unsed internally by 1031 GDB to convert between target and host encodings. As the test above 1032 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. 1033 UTF-16/32 is tested first, UCS-2/4 is tested as a second option, 1034 otherwise an error is generated. */ 1035 1036 const char * 1037 intermediate_encoding (void) 1038 { 1039 iconv_t desc; 1040 static const char *stored_result = NULL; 1041 char *result; 1042 int i; 1043 1044 if (stored_result) 1045 return stored_result; 1046 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), 1047 ENDIAN_SUFFIX); 1048 /* Check that the name is supported by iconv_open. */ 1049 desc = iconv_open (result, host_charset ()); 1050 if (desc != (iconv_t) -1) 1051 { 1052 iconv_close (desc); 1053 stored_result = result; 1054 return result; 1055 } 1056 /* Not valid, free the allocated memory. */ 1057 xfree (result); 1058 /* Second try, with UCS-2 type. */ 1059 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), 1060 ENDIAN_SUFFIX); 1061 /* Check that the name is supported by iconv_open. */ 1062 desc = iconv_open (result, host_charset ()); 1063 if (desc != (iconv_t) -1) 1064 { 1065 iconv_close (desc); 1066 stored_result = result; 1067 return result; 1068 } 1069 /* Not valid, free the allocated memory. */ 1070 xfree (result); 1071 /* No valid charset found, generate error here. */ 1072 error (_("Unable to find a vaild charset for string conversions")); 1073 } 1074 1075 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ 1076 1077 void 1078 _initialize_charset (void) 1079 { 1080 /* The first element is always "auto". */ 1081 VEC_safe_push (char_ptr, charsets, xstrdup ("auto")); 1082 find_charset_names (); 1083 1084 if (VEC_length (char_ptr, charsets) > 1) 1085 charset_enum = (const char **) VEC_address (char_ptr, charsets); 1086 else 1087 charset_enum = default_charset_names; 1088 1089 #ifndef PHONY_ICONV 1090 #ifdef HAVE_LANGINFO_CODESET 1091 /* The result of nl_langinfo may be overwritten later. This may 1092 leak a little memory, if the user later changes the host charset, 1093 but that doesn't matter much. */ 1094 auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); 1095 /* Solaris will return `646' here -- but the Solaris iconv then does 1096 not accept this. Darwin (and maybe FreeBSD) may return "" here, 1097 which GNU libiconv doesn't like (infinite loop). */ 1098 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) 1099 auto_host_charset_name = "ASCII"; 1100 auto_target_charset_name = auto_host_charset_name; 1101 #elif defined (USE_WIN32API) 1102 { 1103 /* "CP" + x<=5 digits + paranoia. */ 1104 static char w32_host_default_charset[16]; 1105 1106 snprintf (w32_host_default_charset, sizeof w32_host_default_charset, 1107 "CP%d", GetACP()); 1108 auto_host_charset_name = w32_host_default_charset; 1109 auto_target_charset_name = auto_host_charset_name; 1110 } 1111 #endif 1112 #endif 1113 1114 add_setshow_enum_cmd ("charset", class_support, 1115 charset_enum, &host_charset_name, _("\ 1116 Set the host and target character sets."), _("\ 1117 Show the host and target character sets."), _("\ 1118 The `host character set' is the one used by the system GDB is running on.\n\ 1119 The `target character set' is the one used by the program being debugged.\n\ 1120 You may only use supersets of ASCII for your host character set; GDB does\n\ 1121 not support any others.\n\ 1122 To see a list of the character sets GDB supports, type `set charset <TAB>'."), 1123 /* Note that the sfunc below needs to set 1124 target_charset_name, because the 'set 1125 charset' command sets two variables. */ 1126 set_charset_sfunc, 1127 show_charset, 1128 &setlist, &showlist); 1129 1130 add_setshow_enum_cmd ("host-charset", class_support, 1131 charset_enum, &host_charset_name, _("\ 1132 Set the host character set."), _("\ 1133 Show the host character set."), _("\ 1134 The `host character set' is the one used by the system GDB is running on.\n\ 1135 You may only use supersets of ASCII for your host character set; GDB does\n\ 1136 not support any others.\n\ 1137 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), 1138 set_host_charset_sfunc, 1139 show_host_charset_name, 1140 &setlist, &showlist); 1141 1142 add_setshow_enum_cmd ("target-charset", class_support, 1143 charset_enum, &target_charset_name, _("\ 1144 Set the target character set."), _("\ 1145 Show the target character set."), _("\ 1146 The `target character set' is the one used by the program being debugged.\n\ 1147 GDB translates characters and strings between the host and target\n\ 1148 character sets as needed.\n\ 1149 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), 1150 set_target_charset_sfunc, 1151 show_target_charset_name, 1152 &setlist, &showlist); 1153 1154 add_setshow_enum_cmd ("target-wide-charset", class_support, 1155 charset_enum, &target_wide_charset_name, 1156 _("\ 1157 Set the target wide character set."), _("\ 1158 Show the target wide character set."), _("\ 1159 The `target wide character set' is the one used by the program being debugged.\ 1160 \nIn particular it is the encoding used by `wchar_t'.\n\ 1161 GDB translates characters and strings between the host and target\n\ 1162 character sets as needed.\n\ 1163 To see a list of the character sets GDB supports, type\n\ 1164 `set target-wide-charset'<TAB>"), 1165 set_target_wide_charset_sfunc, 1166 show_target_wide_charset_name, 1167 &setlist, &showlist); 1168 } 1169