1 /* -*- Mode: C; tab-width: 3; indent-tabs-mode: nil; c-basic-offset: 3 -*- */
2
3 /*
4 * GImageView
5 * Copyright (C) 2001 Takuro Ashie
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 *
21 * $Id: charset.c,v 1.11 2004/09/21 08:44:31 makeinu Exp $
22 */
23
24 #include <string.h>
25 #include "charset.h"
26 #include "intl.h"
27 #include "japanese.h"
28
29 #ifdef USE_GTK2
30 #
31 #else
32 # ifdef HAVE_ICONV
33 # include <stdlib.h>
34 # include <iconv.h>
35 # include <errno.h>
36 # endif /* HAVE_ICONV */
37 #
38 # ifdef HAVE_LIBCHARSET
39 # include <libcharset.h>
40 # elif defined (HAVE_LANGINFO_CODESET)
41 # include <langinfo.h>
42 # elif defined (USE_INCLUDED_LIBINTL)
43 extern const char *locale_charset (void);
44 # else
45 # endif /* HAVE_LIBCHARSET */
46 #endif /* USE_GTK2 */
47
48
49 CharsetDetectLocaleFn charset_locale_fn_table[] = {
50 japanese_locale_charset,
51 };
52
53
54 #ifndef USE_GTK2
55 #ifndef HAVE_ICONV
56
57 /* See lib/encodings.def in libiconv */
58
59 gchar *charset_ascii_defs[] = {
60 CHARSET_ASCII,
61 "ASCII",
62 "ISO646-US",
63 "ISO_646.IRV:1991",
64 "ISO-IR-6",
65 "ANSI_X3.4-1968",
66 "ANSI_X3.4-1986",
67 "CP367",
68 "IBM367",
69 "US",
70 "csASCII",
71 };
72 gint charset_ascii_defs_num = sizeof (charset_ascii_defs) / sizeof (gchar*);
73
74 CharsetConvFn charset_conv_fn_table[] = {
75 japanese_conv,
76 };
77 #endif /* HAVE_ICONV */
78 #endif /* USE_GTK2 */
79
80
81 static gboolean
is_default_codeset(const gchar * charset)82 is_default_codeset (const gchar *charset)
83 {
84 if (!charset && !*charset)
85 return TRUE;
86
87 if (!g_strcasecmp ("default", charset)
88 || !g_strcasecmp ("none", charset)
89 || !g_strcasecmp ("auto", charset)
90 || !g_strcasecmp ("auto detect", charset)
91 || !g_strcasecmp ("auto-detect", charset)
92 || !g_strcasecmp ("auto_detect", charset))
93 {
94 return TRUE;
95 }
96
97 return FALSE;
98 }
99
100
101 /******************************************************************************
102 *
103 * known character set list
104 *
105 ******************************************************************************/
106 static const gchar *knwon_charset_items[] = {
107 "default",
108 CHARSET_ASCII,
109 CHARSET_JIS,
110 CHARSET_EUC_JP,
111 CHARSET_SJIS,
112 CHARSET_UTF8,
113 };
114
115 static GList *known_charset_list = NULL;
116
117 /* FIXME */
118 GList *
charset_get_known_list(const gchar * lang)119 charset_get_known_list (const gchar *lang)
120 {
121 gint i, num = sizeof (knwon_charset_items) / sizeof (gchar *);
122
123 if (known_charset_list) return known_charset_list;
124
125 for (i = 0; i < num; i++) {
126 known_charset_list = g_list_append (known_charset_list,
127 (gpointer) knwon_charset_items[i]);
128 }
129
130 return known_charset_list;
131 }
132
133
134 /******************************************************************************
135 *
136 * auto detect method for each language.
137 *
138 ******************************************************************************/
139 CharsetAutoDetectFn auto_detect_fn_table[] ={
140 NULL,
141 japanese_detect_charset,
142 };
143
144 const gchar *charset_auto_detect_labels[] = {
145 N_("None"),
146 N_("Japanese"),
147 NULL
148 };
149
150
151 CharsetAutoDetectFn
charset_get_auto_detect_func(CharsetAutoDetectType type)152 charset_get_auto_detect_func (CharsetAutoDetectType type)
153 {
154 guint num = sizeof (auto_detect_fn_table) / sizeof (CharsetAutoDetectFn);
155
156 if (type < 0 || type > num) return NULL;
157
158 return auto_detect_fn_table[type];
159 }
160
161
162 /******************************************************************************
163 *
164 * detecting locale & internal charset.
165 *
166 ******************************************************************************/
167 gchar *charset_locale = NULL;
168 gchar *charset_internal = NULL;
169
170
171 const gchar *
get_lang(void)172 get_lang (void)
173 {
174 const gchar *lang = NULL;
175
176 lang = g_getenv ("LANGUAGE");
177
178 if (!lang)
179 lang = g_getenv ("LC_ALL");
180
181 if (!lang)
182 lang = g_getenv ("LC_CTYPE");
183
184 if (!lang)
185 lang = g_getenv ("LC_MESSAGES");
186
187 if (!lang)
188 lang = g_getenv ("LANG");
189
190 if (!lang)
191 lang = "C";
192
193 return lang;
194 }
195
196
197 void
charset_set_locale_charset(const gchar * charset)198 charset_set_locale_charset (const gchar *charset)
199 {
200 if (charset_locale)
201 g_free (charset_locale);
202
203 if (charset && *charset) {
204 if (is_default_codeset (charset)) {
205 charset_locale = NULL;
206 } else {
207 charset_locale = g_strdup (charset);
208 }
209 } else {
210 charset_locale = NULL;
211 }
212 }
213
214
215 void
charset_set_internal_charset(const gchar * charset)216 charset_set_internal_charset (const gchar *charset)
217 {
218 if (charset_internal)
219 g_free (charset_internal);
220
221 if (charset && *charset) {
222 if (is_default_codeset (charset)) {
223 charset_internal = NULL;
224 } else {
225 charset_internal = g_strdup (charset);
226 }
227 } else {
228 charset_internal = NULL;
229 }
230 }
231
232
233 const gchar *
charset_get_locale(void)234 charset_get_locale (void)
235 {
236 const gchar *charset;
237
238 if (charset_locale && *charset_locale)
239 return charset_locale;
240
241 #ifdef USE_GTK2
242 if (!g_get_charset (&charset))
243 charset = NULL;
244 #elif defined (HAVE_LIBCHARSET)
245 charset = locale_charset ();
246 #elif defined (HAVE_LANGINFO_CODESET) && defined (HAVE_GLIBC21)
247 charset = nl_langinfo (CODESET);
248 #elif defined (USE_INCLUDED_LIBINTL)
249 charset = locale_charset ();
250 #else
251 #endif /* USE_GTK2 */
252
253 if (!charset || !*charset) {
254 gint i, num = sizeof (charset_locale_fn_table) / sizeof (CharsetDetectLocaleFn);
255 const gchar *lang;
256
257 lang = get_lang ();
258
259 for (i = 0; i < num; i++) {
260 charset = charset_locale_fn_table[i] (lang);
261 if (charset) break;
262 }
263 }
264
265 if (charset && *charset) {
266 if (charset_locale)
267 g_free (charset_locale);
268 charset_locale = g_strdup (charset);
269
270 return charset_locale;
271 }
272
273 return CHARSET_ASCII;
274 }
275
276
277 const gchar *
charset_get_internal(void)278 charset_get_internal (void)
279 {
280 const gchar *charset;
281
282 if (charset_internal && *charset_internal)
283 return charset_internal;
284
285 #ifdef USE_GTK2
286 charset = CHARSET_UTF8;
287 #else /* USE_GTK2 */
288 charset = charset_get_locale ();
289 #endif /* USE_GTK2 */
290
291 if (charset && *charset) {
292 if (charset_internal)
293 g_free (charset_internal);
294 charset_internal = g_strdup (charset);
295
296 return charset_internal;
297
298 } else {
299 return CHARSET_ASCII;
300 }
301 }
302
303
304
305 /******************************************************************************
306 *
307 * any code -> internal converter
308 *
309 ******************************************************************************/
310 gchar *
charset_to_internal(const gchar * src,const gchar * src_codeset,CharsetAutoDetectFn func,CharsetToInternalTypes type)311 charset_to_internal (const gchar *src,
312 const gchar *src_codeset,
313 CharsetAutoDetectFn func,
314 CharsetToInternalTypes type)
315 {
316 g_return_val_if_fail (src, NULL);
317
318 switch (type) {
319 case CHARSET_TO_INTERNAL_NEVER:
320 return g_strdup (src);
321 case CHARSET_TO_INTERNAL_LOCALE:
322 return charset_locale_to_internal (src);
323 case CHARSET_TO_INTERNAL_AUTO:
324 return charset_to_internal_auto (src, func);
325 case CHARSET_TO_INTERNAL_ANY:
326 if (is_default_codeset (src_codeset)) {
327 src_codeset = charset_get_locale ();
328 }
329 return charset_conv (src, src_codeset, charset_get_internal ());
330 default:
331 break;
332 }
333
334 return g_strdup (src);
335 }
336
337
338 gchar *
charset_locale_to_internal(const gchar * src)339 charset_locale_to_internal (const gchar *src)
340 {
341 g_return_val_if_fail (src, NULL);
342
343 #ifdef USE_GTK2
344 {
345 gssize len = -1;
346 gsize bytes_read, bytes_written;
347
348 return g_locale_to_utf8 (src, len, &bytes_read, &bytes_written, NULL);
349 }
350 #else /* USE_GTK2 */
351 return charset_conv (src, charset_get_locale (), charset_get_internal ());
352 #endif /* USE_GTK2 */
353 }
354
355
356 gchar *
charset_to_internal_auto(const gchar * src,CharsetAutoDetectFn func)357 charset_to_internal_auto (const gchar *src, CharsetAutoDetectFn func)
358 {
359 const gchar *charset = charset_get_internal();
360 if (charset)
361 return charset_conv_auto (src, charset, func);
362
363 return g_strdup (src);
364 }
365
366
367
368 /******************************************************************************
369 *
370 * any code -> locale converter
371 *
372 ******************************************************************************/
373 gchar *
charset_to_locale(const gchar * src,const gchar * src_codeset,CharsetAutoDetectFn func,CharsetToLocaleTypes type)374 charset_to_locale (const gchar *src,
375 const gchar *src_codeset,
376 CharsetAutoDetectFn func,
377 CharsetToLocaleTypes type)
378 {
379 g_return_val_if_fail (src, NULL);
380
381 switch (type) {
382 case CHARSET_TO_LOCALE_NEVER:
383 return g_strdup (src);
384 case CHARSET_TO_LOCALE_INTERNAL:
385 return charset_internal_to_locale (src);
386 case CHARSET_TO_LOCALE_AUTO:
387 return charset_to_locale_auto (src, func);
388 case CHARSET_TO_LOCALE_ANY:
389 if (is_default_codeset (src_codeset))
390 src_codeset = charset_get_internal ();
391 return charset_conv (src, src_codeset, charset_get_locale ());
392 default:
393 break;
394 }
395
396 return g_strdup (src);
397 }
398
399
400 gchar *
charset_internal_to_locale(const gchar * src)401 charset_internal_to_locale (const gchar *src)
402 {
403 g_return_val_if_fail (src, NULL);
404
405 #ifdef USE_GTK2
406 {
407 gssize len = -1;
408 gsize bytes_read, bytes_written;
409 return g_locale_from_utf8 (src, len, &bytes_read, &bytes_written, NULL);
410 }
411 #else /* USE_GTK2 */
412 return charset_conv (src, charset_get_internal (), charset_get_locale ());
413 #endif /* USE_GTK2 */
414 }
415
416
417 gchar *
charset_to_locale_auto(const gchar * src,CharsetAutoDetectFn func)418 charset_to_locale_auto (const gchar *src, CharsetAutoDetectFn func)
419 {
420 const gchar *dest_charset;
421
422 dest_charset = charset_get_locale ();
423 if (dest_charset)
424 return charset_conv_auto (src, dest_charset, func);
425 else
426 return g_strdup (src);
427
428 return g_strdup (src);
429 }
430
431
432
433 /******************************************************************************
434 *
435 * internal -> any code converter
436 *
437 ******************************************************************************/
438 gchar *
charset_from_internal(const gchar * src,const gchar * dest_codeset)439 charset_from_internal (const gchar *src,
440 const gchar *dest_codeset)
441 {
442 g_return_val_if_fail (src, NULL);
443 g_return_val_if_fail (dest_codeset && *dest_codeset, g_strdup (src));
444
445 return charset_conv (src, charset_get_internal (), dest_codeset);
446 }
447
448
449
450 /******************************************************************************
451 *
452 * locale -> any code converter
453 *
454 ******************************************************************************/
455 gchar *
charset_from_locale(const gchar * src,const gchar * dest_codeset)456 charset_from_locale (const gchar *src,
457 const gchar *dest_codeset)
458 {
459 g_return_val_if_fail (src, NULL);
460 g_return_val_if_fail (dest_codeset && *dest_codeset, g_strdup (src));
461
462 return charset_conv (src, charset_get_locale (), dest_codeset);
463 }
464
465
466
467 /******************************************************************************
468 *
469 * any -> any code converter
470 *
471 ******************************************************************************/
472 gchar *
charset_conv(const gchar * src,const gchar * src_codeset,const gchar * dest_codeset)473 charset_conv (const gchar *src,
474 const gchar *src_codeset,
475 const gchar *dest_codeset)
476 {
477 g_return_val_if_fail (src, NULL);
478 g_return_val_if_fail (src_codeset && *src_codeset, g_strdup (src));
479 g_return_val_if_fail (dest_codeset && *dest_codeset, g_strdup (src));
480
481 #ifdef USE_GTK2
482 {
483 gint rbytes, wbytes;
484 return g_convert (src, -1, dest_codeset, src_codeset,
485 &rbytes, &wbytes, NULL);
486 }
487 #else /* USE_GTK2 */
488 # ifdef HAVE_ICONV
489 {
490 unsigned char *buf, *ret;
491 iconv_t cd;
492 size_t insize = 0;
493 size_t outsize = 0;
494 size_t nconv = 0;
495 #ifdef ICONV_CONST
496 ICONV_CONST char *inptr;
497 #else /* ICONV_CONST */
498 char *inptr;
499 #endif
500 char *outptr;
501
502 buf = g_malloc (strlen (src) * 4 + 1);
503 if (!buf) return NULL;
504
505 insize = strlen (src);
506 inptr = (char *) src;
507 outsize = strlen (src) * 4 ;
508 outptr = buf;
509
510 cd = iconv_open (dest_codeset, src_codeset);
511 if (cd == (iconv_t) -1) {
512 switch (errno) {
513 case EINVAL:
514 g_free (buf);
515 return g_strdup (src);
516 default:
517 break;
518 }
519 }
520
521 nconv = iconv (cd, &inptr, &insize, &outptr, &outsize);
522 if (nconv == (size_t) -1) {
523 switch (errno) {
524 case EINVAL:
525 g_free (buf);
526 return g_strdup (src);
527 break;
528 default:
529 break;
530 }
531 } else {
532 iconv (cd, NULL, NULL, &outptr, &outsize);
533 }
534
535 *outptr = '\0';
536 iconv_close (cd);
537
538 ret = g_strdup (buf);
539 g_free(buf);
540
541 return ret;
542 }
543 #else /* HAVE_ICONV */
544 {
545 gint i, num = sizeof (charset_conv_fn_table) / sizeof (CharsetConvFn);
546 gchar *ret;
547
548 for (i = 0; i < num; i++) {
549 ret = charset_conv_fn_table[i] (src, src_codeset, dest_codeset);
550 if (ret) return ret;
551 }
552 }
553 #endif /* HAVE_ICONV */
554 #endif /* USE_GTK2 */
555
556 return g_strdup (src);
557 }
558
559
560 gchar *
charset_conv_auto(const gchar * src,const gchar * dest_codeset,CharsetAutoDetectFn func)561 charset_conv_auto (const gchar *src,
562 const gchar *dest_codeset,
563 CharsetAutoDetectFn func)
564 {
565 const gchar *src_codeset;
566
567 g_return_val_if_fail (src, NULL);
568 g_return_val_if_fail (func, g_strdup (src));
569 g_return_val_if_fail (dest_codeset && *dest_codeset, g_strdup (src));
570
571 src_codeset = func (src);
572
573 g_return_val_if_fail (src_codeset && *src_codeset, g_strdup (src));
574
575 return charset_conv (src, src_codeset, dest_codeset);
576 }
577
578
579
580 /******************************************************************************
581 *
582 * these codes are taken from GLib-2.0.0 (glib/gutf8.c)
583 *
584 * Copyright (C) 1999 Tom Tromey
585 * Copyright (C) 2000 Red Hat, Inc.
586 *
587 *****************************************************************************/
588 #ifndef USE_GTK2
589
590 #define UTF8_COMPUTE(Char, Mask, Len) \
591 if (Char < 128) \
592 { \
593 Len = 1; \
594 Mask = 0x7f; \
595 } \
596 else if ((Char & 0xe0) == 0xc0) \
597 { \
598 Len = 2; \
599 Mask = 0x1f; \
600 } \
601 else if ((Char & 0xf0) == 0xe0) \
602 { \
603 Len = 3; \
604 Mask = 0x0f; \
605 } \
606 else if ((Char & 0xf8) == 0xf0) \
607 { \
608 Len = 4; \
609 Mask = 0x07; \
610 } \
611 else if ((Char & 0xfc) == 0xf8) \
612 { \
613 Len = 5; \
614 Mask = 0x03; \
615 } \
616 else if ((Char & 0xfe) == 0xfc) \
617 { \
618 Len = 6; \
619 Mask = 0x01; \
620 } \
621 else \
622 Len = -1;
623
624
625 #define UTF8_LENGTH(Char) \
626 ((Char) < 0x80 ? 1 : \
627 ((Char) < 0x800 ? 2 : \
628 ((Char) < 0x10000 ? 3 : \
629 ((Char) < 0x200000 ? 4 : \
630 ((Char) < 0x4000000 ? 5 : 6)))))
631
632
633 #define UTF8_GET(Result, Chars, Count, Mask, Len) \
634 (Result) = (Chars)[0] & (Mask); \
635 for ((Count) = 1; (Count) < (Len); ++(Count)) \
636 { \
637 if (((Chars)[(Count)] & 0xc0) != 0x80) \
638 { \
639 (Result) = -1; \
640 break; \
641 } \
642 (Result) <<= 6; \
643 (Result) |= ((Chars)[(Count)] & 0x3f); \
644 }
645
646 #define UNICODE_VALID(Char) \
647 ((Char) < 0x110000 && \
648 ((Char) < 0xD800 || (Char) >= 0xE000) && \
649 (Char) != 0xFFFE && (Char) != 0xFFFF)
650
651
652 gboolean
g_utf8_validate(const gchar * str,gssize max_len,const gchar ** end)653 g_utf8_validate (const gchar *str,
654 gssize max_len,
655 const gchar **end)
656 {
657 const gchar *p;
658
659 g_return_val_if_fail (str != NULL, FALSE);
660
661 if (end)
662 *end = str;
663
664 p = str;
665
666 while ((max_len < 0 || (p - str) < max_len) && *p) {
667 int i, mask = 0, len;
668 gunichar result;
669 unsigned char c = (unsigned char) *p;
670
671 UTF8_COMPUTE (c, mask, len);
672
673 if (len == -1)
674 break;
675
676 /* check that the expected number of bytes exists in str */
677 if (max_len >= 0 &&
678 ((max_len - (p - str)) < len))
679 break;
680
681 UTF8_GET (result, p, i, mask, len);
682
683 if (UTF8_LENGTH (result) != len) /* Check for overlong UTF-8 */
684 break;
685
686 if (result == (gunichar)-1)
687 break;
688
689 if (!UNICODE_VALID (result))
690 break;
691
692 p += len;
693 }
694
695 if (end)
696 *end = p;
697
698 /*
699 * See that we covered the entire length if a length was
700 * passed in, or that we ended on a nul if not
701 */
702 if (max_len >= 0 && p != (str + max_len))
703 return FALSE;
704 else if (max_len < 0 && *p != '\0')
705 return FALSE;
706 else
707 return TRUE;
708 }
709
710 #endif /* USE_GTK2 */
711