1 /*
2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3  * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #  include "config.h"
22 #include "claws-features.h"
23 #endif
24 
25 #include "defs.h"
26 
27 #include <glib.h>
28 #include <glib/gi18n.h>
29 #include <string.h>
30 #include <ctype.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 
34 #if HAVE_LOCALE_H
35 #  include <locale.h>
36 #endif
37 
38 #include "codeconv.h"
39 #include "unmime.h"
40 #include "quoted-printable.h"
41 #include "utils.h"
42 
43 /* For unknown reasons the inconv.m4 macro undefs that macro if no
44    const is needed.  This would break the code below so we define it. */
45 #ifndef ICONV_CONST
46 #define ICONV_CONST
47 #endif
48 
49 typedef enum
50 {
51 	JIS_ASCII,
52 	JIS_KANJI,
53 	JIS_HWKANA,
54 	JIS_AUXKANJI
55 } JISState;
56 
57 #define SUBST_CHAR	0x5f;
58 #define ESC		'\033'
59 
60 #define iseuckanji(c) \
61 	(((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
62 #define iseuchwkana1(c) \
63 	(((c) & 0xff) == 0x8e)
64 #define iseuchwkana2(c) \
65 	(((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
66 #define iseucaux(c) \
67 	(((c) & 0xff) == 0x8f)
68 #define issjiskanji1(c) \
69 	((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
70 	 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
71 #define issjiskanji2(c) \
72 	((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
73 	 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
74 #define issjishwkana(c) \
75 	(((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
76 
77 #define K_IN()				\
78 	if (state != JIS_KANJI) {	\
79 		*out++ = ESC;		\
80 		*out++ = '$';		\
81 		*out++ = 'B';		\
82 		state = JIS_KANJI;	\
83 	}
84 
85 #define K_OUT()				\
86 	if (state != JIS_ASCII) {	\
87 		*out++ = ESC;		\
88 		*out++ = '(';		\
89 		*out++ = 'B';		\
90 		state = JIS_ASCII;	\
91 	}
92 
93 #define HW_IN()				\
94 	if (state != JIS_HWKANA) {	\
95 		*out++ = ESC;		\
96 		*out++ = '(';		\
97 		*out++ = 'I';		\
98 		state = JIS_HWKANA;	\
99 	}
100 
101 #define AUX_IN()			\
102 	if (state != JIS_AUXKANJI) {	\
103 		*out++ = ESC;		\
104 		*out++ = '$';		\
105 		*out++ = '(';		\
106 		*out++ = 'D';		\
107 		state = JIS_AUXKANJI;	\
108 	}
109 
110 static CodeConvFunc conv_get_code_conv_func	(const gchar	*src_charset_str,
111 					 const gchar	*dest_charset_str);
112 
113 static gchar *conv_iconv_strdup_with_cd	(const gchar	*inbuf,
114 					 iconv_t	 cd);
115 
116 static gchar *conv_iconv_strdup		(const gchar	*inbuf,
117 					 const gchar	*src_code,
118 					 const gchar	*dest_code);
119 
120 static CharSet conv_get_locale_charset			(void);
121 static CharSet conv_get_outgoing_charset		(void);
122 static CharSet conv_guess_ja_encoding(const gchar *str);
123 static gboolean conv_is_ja_locale			(void);
124 
125 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
126 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
128 
129 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
130 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
132 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
133 
134 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
135 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
136 
137 static void conv_unreadable_8bit(gchar *str);
138 
139 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
140 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
141 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
142 
143 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
144 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
145 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
146 
147 static gboolean codeconv_strict_mode = FALSE;
148 static gboolean codeconv_allow_jisx0201_kana = FALSE;
149 static gboolean codeconv_broken_are_utf8 = FALSE;
150 
codeconv_set_strict(gboolean mode)151 void codeconv_set_strict(gboolean mode)
152 {
153 	codeconv_strict_mode = mode;
154 }
155 
codeconv_set_allow_jisx0201_kana(gboolean allow)156 void codeconv_set_allow_jisx0201_kana(gboolean allow)
157 {
158 	codeconv_allow_jisx0201_kana = allow;
159 }
160 
codeconv_set_broken_are_utf8(gboolean are)161 void codeconv_set_broken_are_utf8(gboolean are)
162 {
163 	codeconv_broken_are_utf8 = are;
164 }
165 
conv_jistoeuc(gchar * outbuf,gint outlen,const gchar * inbuf)166 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
167 {
168 	const guchar *in = inbuf;
169 	gchar *out = outbuf;
170 	JISState state = JIS_ASCII;
171 
172 	cm_return_val_if_fail(outbuf != NULL, 0);
173 
174 	/*
175 	 * Loop outputs up to 3 bytes in each pass (aux kanji) and we
176 	 * need 1 byte to terminate the output
177 	 */
178 	while (*in != '\0' && (out - outbuf) < outlen - 4) {
179 		if (*in == ESC) {
180 			in++;
181 			if (*in == '$') {
182 				if (*(in + 1) == '@' || *(in + 1) == 'B') {
183 					state = JIS_KANJI;
184 					in += 2;
185 				} else if (*(in + 1) == '(' &&
186 					   *(in + 2) == 'D') {
187 					state = JIS_AUXKANJI;
188 					in += 3;
189 				} else {
190 					/* unknown escape sequence */
191 					state = JIS_ASCII;
192 				}
193 			} else if (*in == '(') {
194 				if (*(in + 1) == 'B' || *(in + 1) == 'J') {
195 					state = JIS_ASCII;
196 					in += 2;
197 				} else if (*(in + 1) == 'I') {
198 					state = JIS_HWKANA;
199 					in += 2;
200 				} else {
201 					/* unknown escape sequence */
202 					state = JIS_ASCII;
203 				}
204 			} else {
205 				/* unknown escape sequence */
206 				state = JIS_ASCII;
207 			}
208 		} else if (*in == 0x0e) {
209 			state = JIS_HWKANA;
210 			in++;
211 		} else if (*in == 0x0f) {
212 			state = JIS_ASCII;
213 			in++;
214 		} else {
215 			switch (state) {
216 			case JIS_ASCII:
217 				*out++ = *in++;
218 				break;
219 			case JIS_KANJI:
220 				*out++ = *in++ | 0x80;
221 				if (*in == '\0') break;
222 				*out++ = *in++ | 0x80;
223 				break;
224 			case JIS_HWKANA:
225 				*out++ = 0x8e;
226 				*out++ = *in++ | 0x80;
227 				break;
228 			case JIS_AUXKANJI:
229 				*out++ = 0x8f;
230 				*out++ = *in++ | 0x80;
231 				if (*in == '\0') break;
232 				*out++ = *in++ | 0x80;
233 				break;
234 			}
235 		}
236 	}
237 
238 	*out = '\0';
239 	return 0;
240 }
241 
242 #define JIS_HWDAKUTEN		0x5e
243 #define JIS_HWHANDAKUTEN	0x5f
244 
conv_jis_hantozen(guchar * outbuf,guchar jis_code,guchar sound_sym)245 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
246 {
247 	static guint16 h2z_tbl[] = {
248 		/* 0x20 - 0x2f */
249 		0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
250 		0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
251 		/* 0x30 - 0x3f */
252 		0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
253 		0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
254 		/* 0x40 - 0x4f */
255 		0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
256 		0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
257 		/* 0x50 - 0x5f */
258 		0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
259 		0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
260 	};
261 
262 	static guint16 dakuten_tbl[] = {
263 		/* 0x30 - 0x3f */
264 		0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
265 		0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
266 		/* 0x40 - 0x4f */
267 		0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
268 		0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
269 	};
270 
271 	static guint16 handakuten_tbl[] = {
272 		/* 0x4a - 0x4e */
273 		0x2551, 0x2554, 0x2557, 0x255a, 0x255d
274 	};
275 
276 	guint16 out_code;
277 
278 	cm_return_val_if_fail(outbuf != NULL, 0);
279 
280 	jis_code &= 0x7f;
281 	sound_sym &= 0x7f;
282 
283 	if (jis_code < 0x21 || jis_code > 0x5f)
284 		return 0;
285 
286 	if (sound_sym == JIS_HWDAKUTEN &&
287 	    jis_code >= 0x36 && jis_code <= 0x4e) {
288 		out_code = dakuten_tbl[jis_code - 0x30];
289 		if (out_code != 0) {
290 			*outbuf = out_code >> 8;
291 			*(outbuf + 1) = out_code & 0xff;
292 			return 2;
293 		}
294 	}
295 
296 	if (sound_sym == JIS_HWHANDAKUTEN &&
297 	    jis_code >= 0x4a && jis_code <= 0x4e) {
298 		out_code = handakuten_tbl[jis_code - 0x4a];
299 		*outbuf = out_code >> 8;
300 		*(outbuf + 1) = out_code & 0xff;
301 		return 2;
302 	}
303 
304 	out_code = h2z_tbl[jis_code - 0x20];
305 	*outbuf = out_code >> 8;
306 	*(outbuf + 1) = out_code & 0xff;
307 	return 1;
308 }
309 
conv_euctojis(gchar * outbuf,gint outlen,const gchar * inbuf)310 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
311 {
312 	const guchar *in = inbuf;
313 	gchar *out = outbuf;
314 	JISState state = JIS_ASCII;
315 
316 	cm_return_val_if_fail(outbuf != NULL, 0);
317 
318 	/*
319 	 * Loop outputs up to 6 bytes in each pass (aux shift + aux
320 	 * kanji) and we need up to 4 bytes to terminate the output
321 	 * (ASCII shift + null)
322 	 */
323 	while (*in != '\0' && (out - outbuf) < outlen - 10) {
324 		if (IS_ASCII(*in)) {
325 			K_OUT();
326 			*out++ = *in++;
327 		} else if (iseuckanji(*in)) {
328 			if (iseuckanji(*(in + 1))) {
329 				K_IN();
330 				*out++ = *in++ & 0x7f;
331 				*out++ = *in++ & 0x7f;
332 			} else {
333 				K_OUT();
334 				*out++ = SUBST_CHAR;
335 				in++;
336 				if (*in != '\0' && !IS_ASCII(*in)) {
337 					*out++ = SUBST_CHAR;
338 					in++;
339 				}
340 			}
341 		} else if (iseuchwkana1(*in)) {
342 			if (iseuchwkana2(*(in + 1))) {
343 				if (codeconv_allow_jisx0201_kana) {
344 					HW_IN();
345 					in++;
346 					*out++ = *in++ & 0x7f;
347 				} else {
348 					guchar jis_ch[2];
349 					gint len;
350 
351 					if (iseuchwkana1(*(in + 2)) &&
352 					    iseuchwkana2(*(in + 3)))
353 						len = conv_jis_hantozen
354 							(jis_ch,
355 							 *(in + 1), *(in + 3));
356 					else
357 						len = conv_jis_hantozen
358 							(jis_ch,
359 							 *(in + 1), '\0');
360 					if (len == 0)
361 						in += 2;
362 					else {
363 						K_IN();
364 						in += len * 2;
365 						*out++ = jis_ch[0];
366 						*out++ = jis_ch[1];
367 					}
368 				}
369 			} else {
370 				K_OUT();
371 				in++;
372 				if (*in != '\0' && !IS_ASCII(*in)) {
373 					*out++ = SUBST_CHAR;
374 					in++;
375 				}
376 			}
377 		} else if (iseucaux(*in)) {
378 			in++;
379 			if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
380 				AUX_IN();
381 				*out++ = *in++ & 0x7f;
382 				*out++ = *in++ & 0x7f;
383 			} else {
384 				K_OUT();
385 				if (*in != '\0' && !IS_ASCII(*in)) {
386 					*out++ = SUBST_CHAR;
387 					in++;
388 					if (*in != '\0' && !IS_ASCII(*in)) {
389 						*out++ = SUBST_CHAR;
390 						in++;
391 					}
392 				}
393 			}
394 		} else {
395 			K_OUT();
396 			*out++ = SUBST_CHAR;
397 			in++;
398 		}
399 	}
400 
401 	K_OUT();
402 	*out = '\0';
403 	return 0;
404 }
405 
conv_sjistoeuc(gchar * outbuf,gint outlen,const gchar * inbuf)406 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
407 {
408 	const guchar *in = inbuf;
409 	gchar *out = outbuf;
410 
411 	cm_return_val_if_fail(outbuf != NULL, 0);
412 
413 	/*
414 	 * Loop outputs up to 2 bytes in each pass and we need 1 byte
415 	 * to terminate the output
416 	 */
417 	while (*in != '\0' && (out - outbuf) < outlen - 3) {
418 		if (IS_ASCII(*in)) {
419 			*out++ = *in++;
420 		} else if (issjiskanji1(*in)) {
421 			if (issjiskanji2(*(in + 1))) {
422 				guchar out1 = *in;
423 				guchar out2 = *(in + 1);
424 				guchar row;
425 
426 				row = out1 < 0xa0 ? 0x70 : 0xb0;
427 				if (out2 < 0x9f) {
428 					out1 = (out1 - row) * 2 - 1;
429 					out2 -= out2 > 0x7f ? 0x20 : 0x1f;
430 				} else {
431 					out1 = (out1 - row) * 2;
432 					out2 -= 0x7e;
433 				}
434 
435 				*out++ = out1 | 0x80;
436 				*out++ = out2 | 0x80;
437 				in += 2;
438 			} else {
439 				*out++ = SUBST_CHAR;
440 				in++;
441 				if (*in != '\0' && !IS_ASCII(*in)) {
442 					*out++ = SUBST_CHAR;
443 					in++;
444 				}
445 			}
446 		} else if (issjishwkana(*in)) {
447 			*out++ = 0x8e;
448 			*out++ = *in++;
449 		} else {
450 			*out++ = SUBST_CHAR;
451 			in++;
452 		}
453 	}
454 
455 	*out = '\0';
456 	return 0;
457 }
458 
conv_jistoutf8(gchar * outbuf,gint outlen,const gchar * inbuf)459 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
460 {
461 	gchar *eucstr;
462 
463 	cm_return_val_if_fail(inbuf != NULL, 0);
464 	cm_return_val_if_fail(outbuf != NULL, 0);
465 
466 	Xalloca(eucstr, outlen, return -1);
467 
468 	if (conv_jistoeuc(eucstr, outlen, inbuf) <0)
469 		return -1;
470 	if (conv_euctoutf8(outbuf, outlen, eucstr) < 0)
471 		return -1;
472 	return 0;
473 }
474 
conv_sjistoutf8(gchar * outbuf,gint outlen,const gchar * inbuf)475 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
476 {
477 	gchar *tmpstr;
478 
479 	cm_return_val_if_fail(inbuf != NULL, 0);
480 	cm_return_val_if_fail(outbuf != NULL, 0);
481 
482 	tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
483 	if (tmpstr) {
484 		strncpy2(outbuf, tmpstr, outlen);
485 		g_free(tmpstr);
486 		return 0;
487 	} else {
488 		strncpy2(outbuf, inbuf, outlen);
489 		return -1;
490 	}
491 }
492 
conv_euctoutf8(gchar * outbuf,gint outlen,const gchar * inbuf)493 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
494 {
495 	static iconv_t cd = (iconv_t)-1;
496 	static gboolean iconv_ok = TRUE;
497 	gchar *tmpstr;
498 
499 	cm_return_val_if_fail(inbuf != NULL, 0);
500 	cm_return_val_if_fail(outbuf != NULL, 0);
501 
502 	if (cd == (iconv_t)-1) {
503 		if (!iconv_ok) {
504 			strncpy2(outbuf, inbuf, outlen);
505 			return -1;
506 		}
507 		cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
508 		if (cd == (iconv_t)-1) {
509 			cd = iconv_open(CS_UTF_8, CS_EUC_JP);
510 			if (cd == (iconv_t)-1) {
511 				g_warning("conv_euctoutf8(): %s",
512 					  g_strerror(errno));
513 				iconv_ok = FALSE;
514 				strncpy2(outbuf, inbuf, outlen);
515 				return -1;
516 			}
517 		}
518 	}
519 
520 	tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
521 	if (tmpstr) {
522 		strncpy2(outbuf, tmpstr, outlen);
523 		g_free(tmpstr);
524 		return 0;
525 	} else {
526 		strncpy2(outbuf, inbuf, outlen);
527 		return -1;
528 	}
529 }
530 
conv_anytoutf8(gchar * outbuf,gint outlen,const gchar * inbuf)531 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
532 {
533 	gint r = -1;
534 
535 	cm_return_val_if_fail(inbuf != NULL, 0);
536 	cm_return_val_if_fail(outbuf != NULL, 0);
537 
538 	switch (conv_guess_ja_encoding(inbuf)) {
539 	case C_ISO_2022_JP:
540 		r = conv_jistoutf8(outbuf, outlen, inbuf);
541 		break;
542 	case C_SHIFT_JIS:
543 		r = conv_sjistoutf8(outbuf, outlen, inbuf);
544 		break;
545 	case C_EUC_JP:
546 		r = conv_euctoutf8(outbuf, outlen, inbuf);
547 		break;
548 	default:
549 		r = 0;
550 		strncpy2(outbuf, inbuf, outlen);
551 		break;
552 	}
553 
554 	return r;
555 }
556 
conv_utf8toeuc(gchar * outbuf,gint outlen,const gchar * inbuf)557 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
558 {
559 	static iconv_t cd = (iconv_t)-1;
560 	static gboolean iconv_ok = TRUE;
561 	gchar *tmpstr;
562 
563 	cm_return_val_if_fail(inbuf != NULL, 0);
564 	cm_return_val_if_fail(outbuf != NULL, 0);
565 
566 	if (cd == (iconv_t)-1) {
567 		if (!iconv_ok) {
568 			strncpy2(outbuf, inbuf, outlen);
569 			return -1;
570 		}
571 		cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
572 		if (cd == (iconv_t)-1) {
573 			cd = iconv_open(CS_EUC_JP, CS_UTF_8);
574 			if (cd == (iconv_t)-1) {
575 				g_warning("conv_utf8toeuc(): %s",
576 					  g_strerror(errno));
577 				iconv_ok = FALSE;
578 				strncpy2(outbuf, inbuf, outlen);
579 				return -1;
580 			}
581 		}
582 	}
583 
584 	tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
585 	if (tmpstr) {
586 		strncpy2(outbuf, tmpstr, outlen);
587 		g_free(tmpstr);
588 		return 0;
589 	} else {
590 		strncpy2(outbuf, inbuf, outlen);
591 		return -1;
592 	}
593 }
594 
conv_utf8tojis(gchar * outbuf,gint outlen,const gchar * inbuf)595 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
596 {
597 	gchar *eucstr;
598 
599 	cm_return_val_if_fail(inbuf != NULL, 0);
600 	cm_return_val_if_fail(outbuf != NULL, 0);
601 
602 	Xalloca(eucstr, outlen, return -1);
603 
604 	if (conv_utf8toeuc(eucstr, outlen, inbuf) < 0)
605 		return -1;
606 	if (conv_euctojis(outbuf, outlen, eucstr) < 0)
607 		return -1;
608 
609 	return 0;
610 }
611 
conv_unreadable_8bit(gchar * str)612 static void conv_unreadable_8bit(gchar *str)
613 {
614 	register guchar *p = str;
615 
616 	while (*p != '\0') {
617 		/* convert CR+LF -> LF */
618 		if (*p == '\r' && *(p + 1) == '\n')
619 			memmove(p, p + 1, strlen(p));
620 		else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
621 		p++;
622 	}
623 }
624 
conv_guess_ja_encoding(const gchar * str)625 static CharSet conv_guess_ja_encoding(const gchar *str)
626 {
627 	const guchar *p = str;
628 	CharSet guessed = C_US_ASCII;
629 
630 	while (*p != '\0') {
631 		if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
632 			if (guessed == C_US_ASCII)
633 				return C_ISO_2022_JP;
634 			p += 2;
635 		} else if (IS_ASCII(*p)) {
636 			p++;
637 		} else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
638 			if (*p >= 0xfd && *p <= 0xfe)
639 				return C_EUC_JP;
640 			else if (guessed == C_SHIFT_JIS) {
641 				if ((issjiskanji1(*p) &&
642 				     issjiskanji2(*(p + 1))) ||
643 				    issjishwkana(*p))
644 					guessed = C_SHIFT_JIS;
645 				else
646 					guessed = C_EUC_JP;
647 			} else
648 				guessed = C_EUC_JP;
649 			p += 2;
650 		} else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
651 			if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
652 				guessed = C_SHIFT_JIS;
653 			else
654 				return C_SHIFT_JIS;
655 			p += 2;
656 		} else if (issjishwkana(*p)) {
657 			guessed = C_SHIFT_JIS;
658 			p++;
659 		} else {
660 			p++;
661 		}
662 	}
663 
664 	return guessed;
665 }
666 
conv_jistodisp(gchar * outbuf,gint outlen,const gchar * inbuf)667 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
668 {
669 	cm_return_val_if_fail(inbuf != NULL, 0);
670 	cm_return_val_if_fail(outbuf != NULL, 0);
671 
672 	return conv_jistoutf8(outbuf, outlen, inbuf);
673 }
674 
conv_sjistodisp(gchar * outbuf,gint outlen,const gchar * inbuf)675 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
676 {
677 	cm_return_val_if_fail(inbuf != NULL, 0);
678 	cm_return_val_if_fail(outbuf != NULL, 0);
679 
680 	return conv_sjistoutf8(outbuf, outlen, inbuf);
681 }
682 
conv_euctodisp(gchar * outbuf,gint outlen,const gchar * inbuf)683 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
684 {
685 	cm_return_val_if_fail(inbuf != NULL, 0);
686 	cm_return_val_if_fail(outbuf != NULL, 0);
687 
688 	return conv_euctoutf8(outbuf, outlen, inbuf);
689 }
690 
conv_utf8todisp(gchar * outbuf,gint outlen,const gchar * inbuf)691 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
692 {
693 	cm_return_if_fail(inbuf != NULL);
694 	cm_return_if_fail(outbuf != NULL);
695 
696 	if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
697 		strncpy2(outbuf, inbuf, outlen);
698 	else
699 		conv_ustodisp(outbuf, outlen, inbuf);
700 }
701 
conv_anytodisp(gchar * outbuf,gint outlen,const gchar * inbuf)702 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
703 {
704 	gint r = 0;
705 
706 	cm_return_val_if_fail(inbuf != NULL, 0);
707 	cm_return_val_if_fail(outbuf != NULL, 0);
708 
709 	if (conv_anytoutf8(outbuf, outlen, inbuf) < 0)
710 		r = -1;
711 	if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
712 		conv_unreadable_8bit(outbuf);
713 	return r;
714 }
715 
conv_ustodisp(gchar * outbuf,gint outlen,const gchar * inbuf)716 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
717 {
718 	cm_return_val_if_fail(inbuf != NULL, 0);
719 	cm_return_val_if_fail(outbuf != NULL, 0);
720 
721 	strncpy2(outbuf, inbuf, outlen);
722 	conv_unreadable_8bit(outbuf);
723 
724 	return 0;
725 }
726 
conv_localetodisp(gchar * outbuf,gint outlen,const gchar * inbuf)727 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
728 {
729 	gchar *tmpstr;
730 
731 	cm_return_if_fail(inbuf != NULL);
732 	cm_return_if_fail(outbuf != NULL);
733 
734 	codeconv_set_strict(TRUE);
735 	tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
736 				   CS_INTERNAL);
737 	codeconv_set_strict(FALSE);
738 	if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
739 		strncpy2(outbuf, tmpstr, outlen);
740 		g_free(tmpstr);
741 		return;
742 	} else if (tmpstr && !g_utf8_validate(tmpstr, -1, NULL)) {
743 		g_free(tmpstr);
744 		codeconv_set_strict(TRUE);
745 		tmpstr = conv_iconv_strdup(inbuf,
746 				conv_get_locale_charset_str_no_utf8(),
747 				CS_INTERNAL);
748 		codeconv_set_strict(FALSE);
749 	}
750 	if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
751 		strncpy2(outbuf, tmpstr, outlen);
752 		g_free(tmpstr);
753 		return;
754 	} else {
755 		g_free(tmpstr);
756 		conv_utf8todisp(outbuf, outlen, inbuf);
757 	}
758 }
759 
conv_noconv(gchar * outbuf,gint outlen,const gchar * inbuf)760 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
761 {
762 	cm_return_val_if_fail(inbuf != NULL, 0);
763 	cm_return_val_if_fail(outbuf != NULL, 0);
764 
765 	strncpy2(outbuf, inbuf, outlen);
766 	return 0;
767 }
768 
769 static const gchar *
conv_get_fallback_for_private_encoding(const gchar * encoding)770 conv_get_fallback_for_private_encoding(const gchar *encoding)
771 {
772 	if (encoding) {
773 		if ((encoding[0] == 'X' || encoding[0] == 'x') &&
774 		    encoding[1] == '-') {
775 			if (!g_ascii_strcasecmp(encoding, CS_X_MACCYR))
776 				return CS_MACCYR;
777 			if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
778 				return CS_GBK;
779 		}
780 		else if(!g_ascii_strcasecmp(encoding, CS_ISO_8859_8_I)) {
781 			/*
782 			 * ISO-8859-8-I is a variant which fully
783 			 * agrees with ISO-8859-8 on character
784 			 * codings, and differs only in directionality
785 			 * implications, which are ignored here
786 			 * anyway; and is not recognized by iconv
787 			 */
788 			return CS_ISO_8859_8;
789 		}
790 	}
791 
792 	return encoding;
793 }
794 
conv_code_converter_new(const gchar * src_charset)795 CodeConverter *conv_code_converter_new(const gchar *src_charset)
796 {
797 	CodeConverter *conv;
798 
799 	src_charset = conv_get_fallback_for_private_encoding(src_charset);
800 
801 	conv = g_new0(CodeConverter, 1);
802 	conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
803 	conv->charset_str = g_strdup(src_charset);
804 	conv->charset = conv_get_charset_from_str(src_charset);
805 
806 	return conv;
807 }
808 
conv_code_converter_destroy(CodeConverter * conv)809 void conv_code_converter_destroy(CodeConverter *conv)
810 {
811 	g_free(conv->charset_str);
812 	g_free(conv);
813 }
814 
conv_convert(CodeConverter * conv,gchar * outbuf,gint outlen,const gchar * inbuf)815 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
816 		  const gchar *inbuf)
817 {
818 	cm_return_val_if_fail(inbuf != NULL, -1);
819 	cm_return_val_if_fail(outbuf != NULL, -1);
820 
821 	if (conv->code_conv_func != conv_noconv)
822 		return conv->code_conv_func(outbuf, outlen, inbuf);
823 	else {
824 		gchar *str;
825 
826 		str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
827 		if (!str)
828 			return -1;
829 		else {
830 			strncpy2(outbuf, str, outlen);
831 			g_free(str);
832 		}
833 	}
834 
835 	return 0;
836 }
837 
conv_codeset_strdup(const gchar * inbuf,const gchar * src_code,const gchar * dest_code)838 gchar *conv_codeset_strdup(const gchar *inbuf,
839 			   const gchar *src_code, const gchar *dest_code)
840 {
841 	gchar *buf;
842 	size_t len;
843 	CodeConvFunc conv_func;
844 
845 	cm_return_val_if_fail(inbuf != NULL, NULL);
846 
847 	if (!g_strcmp0(src_code, dest_code)) {
848 		CharSet dest_charset = conv_get_charset_from_str(dest_code);
849 		if (codeconv_strict_mode && dest_charset == C_UTF_8) {
850 			/* ensure valid UTF-8 if target is UTF-8 */
851 			if (!g_utf8_validate(inbuf, -1, NULL)) {
852 				return NULL;
853 			}
854 		}
855 		/* otherwise, try for a lucky day */
856 		return g_strdup(inbuf);
857 	}
858 
859 	src_code = conv_get_fallback_for_private_encoding(src_code);
860 	conv_func = conv_get_code_conv_func(src_code, dest_code);
861 	if (conv_func == conv_ustodisp
862 			&& codeconv_strict_mode
863 			&& !is_ascii_str(inbuf))
864 		return NULL;
865 
866 	if (conv_func != conv_noconv) {
867 		len = (strlen(inbuf) + 1) * 3;
868 		buf = g_malloc(len);
869 
870 		if (conv_func(buf, len, inbuf) == 0 || !codeconv_strict_mode)
871 			return g_realloc(buf, strlen(buf) + 1);
872 		else {
873 			g_free(buf);
874 			return NULL;
875 		}
876 	}
877 
878 	return conv_iconv_strdup(inbuf, src_code, dest_code);
879 }
880 
conv_get_code_conv_func(const gchar * src_charset_str,const gchar * dest_charset_str)881 static CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
882 				     const gchar *dest_charset_str)
883 {
884 	CodeConvFunc code_conv = conv_noconv;
885 	CharSet src_charset;
886 	CharSet dest_charset;
887 
888 	if (!src_charset_str)
889 		src_charset = conv_get_locale_charset();
890 	else
891 		src_charset = conv_get_charset_from_str(src_charset_str);
892 
893 	/* auto detection mode */
894 	if (!src_charset_str && !dest_charset_str) {
895 		if (conv_is_ja_locale())
896 			return conv_anytodisp;
897 		else
898 			return conv_noconv;
899 	}
900 
901 	dest_charset = conv_get_charset_from_str(dest_charset_str);
902 
903 	if (dest_charset == C_US_ASCII)
904 		return conv_ustodisp;
905 
906 	switch (src_charset) {
907 	case C_US_ASCII:
908 	case C_ISO_8859_1:
909 	case C_ISO_8859_2:
910 	case C_ISO_8859_3:
911 	case C_ISO_8859_4:
912 	case C_ISO_8859_5:
913 	case C_ISO_8859_6:
914 	case C_ISO_8859_7:
915 	case C_ISO_8859_8:
916 	case C_ISO_8859_9:
917 	case C_ISO_8859_10:
918 	case C_ISO_8859_11:
919 	case C_ISO_8859_13:
920 	case C_ISO_8859_14:
921 	case C_ISO_8859_15:
922 		break;
923 	case C_ISO_2022_JP:
924 	case C_ISO_2022_JP_2:
925 	case C_ISO_2022_JP_3:
926 		if (dest_charset == C_AUTO)
927 			code_conv = conv_jistodisp;
928 		else if (dest_charset == C_EUC_JP)
929 			code_conv = conv_jistoeuc;
930 		else if (dest_charset == C_UTF_8)
931 			code_conv = conv_jistoutf8;
932 		break;
933 	case C_SHIFT_JIS:
934 		if (dest_charset == C_AUTO)
935 			code_conv = conv_sjistodisp;
936 		else if (dest_charset == C_EUC_JP)
937 			code_conv = conv_sjistoeuc;
938 		else if (dest_charset == C_UTF_8)
939 			code_conv = conv_sjistoutf8;
940 		break;
941 	case C_EUC_JP:
942 		if (dest_charset == C_AUTO)
943 			code_conv = conv_euctodisp;
944 		else if (dest_charset == C_ISO_2022_JP   ||
945 			 dest_charset == C_ISO_2022_JP_2 ||
946 			 dest_charset == C_ISO_2022_JP_3)
947 			code_conv = conv_euctojis;
948 		else if (dest_charset == C_UTF_8)
949 			code_conv = conv_euctoutf8;
950 		break;
951 	case C_UTF_8:
952 		if (dest_charset == C_EUC_JP)
953 			code_conv = conv_utf8toeuc;
954 		else if (dest_charset == C_ISO_2022_JP   ||
955 			 dest_charset == C_ISO_2022_JP_2 ||
956 			 dest_charset == C_ISO_2022_JP_3)
957 			code_conv = conv_utf8tojis;
958 		break;
959 	default:
960 		break;
961 	}
962 
963 	return code_conv;
964 }
965 
conv_iconv_strdup(const gchar * inbuf,const gchar * src_code,const gchar * dest_code)966 static gchar *conv_iconv_strdup(const gchar *inbuf,
967 			 const gchar *src_code, const gchar *dest_code)
968 {
969 	iconv_t cd;
970 	gchar *outbuf;
971 
972 	cm_return_val_if_fail(inbuf != NULL, NULL);
973 
974 	if (!src_code && !dest_code &&
975 	    g_utf8_validate(inbuf, -1, NULL))
976 	    	return g_strdup(inbuf);
977 
978 	if (!src_code)
979 		src_code = conv_get_outgoing_charset_str();
980 	if (!dest_code)
981 		dest_code = CS_INTERNAL;
982 
983 	/* don't convert if src and dest codeset are identical */
984 	if (!strcasecmp(src_code, dest_code))
985 		return g_strdup(inbuf);
986 
987 	/* don't convert if dest codeset is US-ASCII */
988 	if (!strcasecmp(src_code, CS_US_ASCII))
989 		return g_strdup(inbuf);
990 
991 	/* don't convert if dest codeset is US-ASCII */
992 	if (!strcasecmp(dest_code, CS_US_ASCII))
993 		return g_strdup(inbuf);
994 
995 	cd = iconv_open(dest_code, src_code);
996 	if (cd == (iconv_t)-1)
997 		return NULL;
998 
999 	outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
1000 
1001 	iconv_close(cd);
1002 
1003 	return outbuf;
1004 }
1005 
conv_iconv_strdup_with_cd(const gchar * inbuf,iconv_t cd)1006 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
1007 {
1008 	const gchar *inbuf_p;
1009 	gchar *outbuf;
1010 	gchar *outbuf_p;
1011 	size_t in_size;
1012 	size_t in_left;
1013 	size_t out_size;
1014 	size_t out_left;
1015 	size_t n_conv;
1016 	size_t len;
1017 
1018 	cm_return_val_if_fail(inbuf != NULL, NULL);
1019 
1020 	inbuf_p = inbuf;
1021 	in_size = strlen(inbuf);
1022 	in_left = in_size;
1023 	out_size = (in_size + 1) * 2;
1024 	outbuf = g_malloc(out_size);
1025 	outbuf_p = outbuf;
1026 	out_left = out_size;
1027 
1028 #define EXPAND_BUF()				\
1029 {						\
1030 	len = outbuf_p - outbuf;		\
1031 	out_size *= 2;				\
1032 	outbuf = g_realloc(outbuf, out_size);	\
1033 	outbuf_p = outbuf + len;		\
1034 	out_left = out_size - len;		\
1035 }
1036 
1037 	while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1038 			       &outbuf_p, &out_left)) == (size_t)-1) {
1039 		if (EILSEQ == errno) {
1040 			if (codeconv_strict_mode) {
1041 				g_free(outbuf);
1042 				return NULL;
1043 			}
1044 			//g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1045 			inbuf_p++;
1046 			in_left--;
1047 			if (out_left == 0) {
1048 				EXPAND_BUF();
1049 			}
1050 			*outbuf_p++ = SUBST_CHAR;
1051 			out_left--;
1052 		} else if (EINVAL == errno) {
1053 			break;
1054 		} else if (E2BIG == errno) {
1055 			EXPAND_BUF();
1056 		} else {
1057 			g_warning("conv_iconv_strdup(): %s",
1058 				  g_strerror(errno));
1059 			break;
1060 		}
1061 	}
1062 
1063 	while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1064 	       (size_t)-1) {
1065 		if (E2BIG == errno) {
1066 			EXPAND_BUF();
1067 		} else {
1068 			g_warning("conv_iconv_strdup(): %s",
1069 				  g_strerror(errno));
1070 			break;
1071 		}
1072 	}
1073 
1074 #undef EXPAND_BUF
1075 
1076 	len = outbuf_p - outbuf;
1077 	outbuf = g_realloc(outbuf, len + 1);
1078 	outbuf[len] = '\0';
1079 
1080 	return outbuf;
1081 }
1082 
1083 static const struct {
1084 	CharSet charset;
1085 	gchar *const name;
1086 } charsets[] = {
1087 	{C_US_ASCII,		CS_US_ASCII},
1088 	{C_US_ASCII,		CS_ANSI_X3_4_1968},
1089 	{C_UTF_8,		CS_UTF_8},
1090 	{C_UTF_7,		CS_UTF_7},
1091 	{C_ISO_8859_1,		CS_ISO_8859_1},
1092 	{C_ISO_8859_2,		CS_ISO_8859_2},
1093 	{C_ISO_8859_3,		CS_ISO_8859_3},
1094 	{C_ISO_8859_4,		CS_ISO_8859_4},
1095 	{C_ISO_8859_5,		CS_ISO_8859_5},
1096 	{C_ISO_8859_6,		CS_ISO_8859_6},
1097 	{C_ISO_8859_7,		CS_ISO_8859_7},
1098 	{C_ISO_8859_8,		CS_ISO_8859_8},
1099 	{C_ISO_8859_9,		CS_ISO_8859_9},
1100 	{C_ISO_8859_10,		CS_ISO_8859_10},
1101 	{C_ISO_8859_11,		CS_ISO_8859_11},
1102 	{C_ISO_8859_13,		CS_ISO_8859_13},
1103 	{C_ISO_8859_14,		CS_ISO_8859_14},
1104 	{C_ISO_8859_15,		CS_ISO_8859_15},
1105 	{C_BALTIC,		CS_BALTIC},
1106 	{C_CP1250,		CS_CP1250},
1107 	{C_CP1251,		CS_CP1251},
1108 	{C_CP1252,		CS_CP1252},
1109 	{C_CP1253,		CS_CP1253},
1110 	{C_CP1254,		CS_CP1254},
1111 	{C_CP1255,		CS_CP1255},
1112 	{C_CP1256,		CS_CP1256},
1113 	{C_CP1257,		CS_CP1257},
1114 	{C_CP1258,		CS_CP1258},
1115 	{C_WINDOWS_1250,	CS_WINDOWS_1250},
1116 	{C_WINDOWS_1251,	CS_WINDOWS_1251},
1117 	{C_WINDOWS_1252,	CS_WINDOWS_1252},
1118 	{C_WINDOWS_1253,	CS_WINDOWS_1253},
1119 	{C_WINDOWS_1254,	CS_WINDOWS_1254},
1120 	{C_WINDOWS_1255,	CS_WINDOWS_1255},
1121 	{C_WINDOWS_1256,	CS_WINDOWS_1256},
1122 	{C_WINDOWS_1257,	CS_WINDOWS_1257},
1123 	{C_WINDOWS_1258,	CS_WINDOWS_1258},
1124 	{C_KOI8_R,		CS_KOI8_R},
1125 	{C_MACCYR,		CS_MACCYR},
1126 	{C_KOI8_T,		CS_KOI8_T},
1127 	{C_KOI8_U,		CS_KOI8_U},
1128 	{C_ISO_2022_JP,		CS_ISO_2022_JP},
1129 	{C_ISO_2022_JP_2,	CS_ISO_2022_JP_2},
1130 	{C_ISO_2022_JP_3,	CS_ISO_2022_JP_3},
1131 	{C_EUC_JP,		CS_EUC_JP},
1132 	{C_EUC_JP,		CS_EUCJP},
1133 	{C_EUC_JP_MS,		CS_EUC_JP_MS},
1134 	{C_SHIFT_JIS,		CS_SHIFT_JIS},
1135 	{C_SHIFT_JIS,		CS_SHIFT__JIS},
1136 	{C_SHIFT_JIS,		CS_SJIS},
1137 	{C_ISO_2022_KR,		CS_ISO_2022_KR},
1138 	{C_EUC_KR,		CS_EUC_KR},
1139 	{C_ISO_2022_CN,		CS_ISO_2022_CN},
1140 	{C_EUC_CN,		CS_EUC_CN},
1141 	{C_GB18030,		CS_GB18030},
1142 	{C_GB2312,		CS_GB2312},
1143 	{C_GBK,			CS_GBK},
1144 	{C_EUC_TW,		CS_EUC_TW},
1145 	{C_BIG5,		CS_BIG5},
1146 	{C_BIG5_HKSCS,		CS_BIG5_HKSCS},
1147 	{C_TIS_620,		CS_TIS_620},
1148 	{C_WINDOWS_874,		CS_WINDOWS_874},
1149 	{C_GEORGIAN_PS,		CS_GEORGIAN_PS},
1150 	{C_TCVN5712_1,		CS_TCVN5712_1},
1151 };
1152 
1153 static const struct {
1154 	gchar *const locale;
1155 	CharSet charset;
1156 	CharSet out_charset;
1157 } locale_table[] = {
1158 	{"ja_JP.eucJP"		, C_EUC_JP	, C_ISO_2022_JP},
1159 	{"ja_JP.EUC-JP"		, C_EUC_JP	, C_ISO_2022_JP},
1160 	{"ja_JP.EUC"		, C_EUC_JP	, C_ISO_2022_JP},
1161 	{"ja_JP.ujis"		, C_EUC_JP	, C_ISO_2022_JP},
1162 	{"ja_JP.SJIS"		, C_SHIFT_JIS	, C_ISO_2022_JP},
1163 	{"ja_JP.JIS"		, C_ISO_2022_JP	, C_ISO_2022_JP},
1164 #ifdef G_OS_WIN32
1165 	{"ja_JP"		, C_SHIFT_JIS	, C_ISO_2022_JP},
1166 #else
1167 	{"ja_JP"		, C_EUC_JP	, C_ISO_2022_JP},
1168 #endif
1169 	{"ko_KR.EUC-KR"		, C_EUC_KR	, C_EUC_KR},
1170 	{"ko_KR"		, C_EUC_KR	, C_EUC_KR},
1171 	{"zh_CN.GB18030"	, C_GB18030	, C_GB18030},
1172 	{"zh_CN.GB2312"		, C_GB2312	, C_GB2312},
1173 	{"zh_CN.GBK"		, C_GBK		, C_GBK},
1174 	{"zh_CN"		, C_GB18030	, C_GB18030},
1175 	{"zh_HK"		, C_BIG5_HKSCS	, C_BIG5_HKSCS},
1176 	{"zh_TW.eucTW"		, C_EUC_TW	, C_BIG5},
1177 	{"zh_TW.EUC-TW"		, C_EUC_TW	, C_BIG5},
1178 	{"zh_TW.Big5"		, C_BIG5	, C_BIG5},
1179 	{"zh_TW"		, C_BIG5	, C_BIG5},
1180 
1181 	{"ru_RU.KOI8-R"		, C_KOI8_R	, C_KOI8_R},
1182 	{"ru_RU.KOI8R"		, C_KOI8_R	, C_KOI8_R},
1183 	{"ru_RU.CP1251"		, C_WINDOWS_1251, C_KOI8_R},
1184 #ifdef G_OS_WIN32
1185 	{"ru_RU"		, C_WINDOWS_1251, C_KOI8_R},
1186 #else
1187 	{"ru_RU"		, C_ISO_8859_5	, C_KOI8_R},
1188 #endif
1189 	{"tg_TJ"		, C_KOI8_T	, C_KOI8_T},
1190 	{"ru_UA"		, C_KOI8_U	, C_KOI8_U},
1191 	{"uk_UA.CP1251"		, C_WINDOWS_1251, C_KOI8_U},
1192 	{"uk_UA"		, C_KOI8_U	, C_KOI8_U},
1193 
1194 	{"be_BY"		, C_WINDOWS_1251, C_WINDOWS_1251},
1195 	{"bg_BG"		, C_WINDOWS_1251, C_WINDOWS_1251},
1196 
1197 	{"yi_US"		, C_WINDOWS_1255, C_WINDOWS_1255},
1198 
1199 	{"af_ZA"		, C_ISO_8859_1  , C_ISO_8859_1},
1200 	{"br_FR"		, C_ISO_8859_1	, C_ISO_8859_1},
1201 	{"ca_ES"		, C_ISO_8859_1	, C_ISO_8859_1},
1202 	{"da_DK"		, C_ISO_8859_1	, C_ISO_8859_1},
1203 	{"de_AT"		, C_ISO_8859_1	, C_ISO_8859_1},
1204 	{"de_BE"		, C_ISO_8859_1	, C_ISO_8859_1},
1205 	{"de_CH"		, C_ISO_8859_1	, C_ISO_8859_1},
1206 	{"de_DE"		, C_ISO_8859_1	, C_ISO_8859_1},
1207 	{"de_LU"		, C_ISO_8859_1	, C_ISO_8859_1},
1208 	{"en_AU"		, C_ISO_8859_1	, C_ISO_8859_1},
1209 	{"en_BW"		, C_ISO_8859_1	, C_ISO_8859_1},
1210 	{"en_CA"		, C_ISO_8859_1	, C_ISO_8859_1},
1211 	{"en_DK"		, C_ISO_8859_1	, C_ISO_8859_1},
1212 	{"en_GB"		, C_ISO_8859_1	, C_ISO_8859_1},
1213 	{"en_HK"		, C_ISO_8859_1	, C_ISO_8859_1},
1214 	{"en_IE"		, C_ISO_8859_1	, C_ISO_8859_1},
1215 	{"en_NZ"		, C_ISO_8859_1	, C_ISO_8859_1},
1216 	{"en_PH"		, C_ISO_8859_1	, C_ISO_8859_1},
1217 	{"en_SG"		, C_ISO_8859_1	, C_ISO_8859_1},
1218 	{"en_US"		, C_ISO_8859_1	, C_ISO_8859_1},
1219 	{"en_ZA"		, C_ISO_8859_1	, C_ISO_8859_1},
1220 	{"en_ZW"		, C_ISO_8859_1	, C_ISO_8859_1},
1221 	{"es_AR"		, C_ISO_8859_1	, C_ISO_8859_1},
1222 	{"es_BO"		, C_ISO_8859_1	, C_ISO_8859_1},
1223 	{"es_CL"		, C_ISO_8859_1	, C_ISO_8859_1},
1224 	{"es_CO"		, C_ISO_8859_1	, C_ISO_8859_1},
1225 	{"es_CR"		, C_ISO_8859_1	, C_ISO_8859_1},
1226 	{"es_DO"		, C_ISO_8859_1	, C_ISO_8859_1},
1227 	{"es_EC"		, C_ISO_8859_1	, C_ISO_8859_1},
1228 	{"es_ES"		, C_ISO_8859_1	, C_ISO_8859_1},
1229 	{"es_GT"		, C_ISO_8859_1	, C_ISO_8859_1},
1230 	{"es_HN"		, C_ISO_8859_1	, C_ISO_8859_1},
1231 	{"es_MX"		, C_ISO_8859_1	, C_ISO_8859_1},
1232 	{"es_NI"		, C_ISO_8859_1	, C_ISO_8859_1},
1233 	{"es_PA"		, C_ISO_8859_1	, C_ISO_8859_1},
1234 	{"es_PE"		, C_ISO_8859_1	, C_ISO_8859_1},
1235 	{"es_PR"		, C_ISO_8859_1	, C_ISO_8859_1},
1236 	{"es_PY"		, C_ISO_8859_1	, C_ISO_8859_1},
1237 	{"es_SV"		, C_ISO_8859_1	, C_ISO_8859_1},
1238 	{"es_US"		, C_ISO_8859_1	, C_ISO_8859_1},
1239 	{"es_UY"		, C_ISO_8859_1	, C_ISO_8859_1},
1240 	{"es_VE"		, C_ISO_8859_1	, C_ISO_8859_1},
1241 	{"et_EE"		, C_ISO_8859_1	, C_ISO_8859_1},
1242 	{"eu_ES"		, C_ISO_8859_1	, C_ISO_8859_1},
1243 	{"fi_FI"		, C_ISO_8859_1	, C_ISO_8859_1},
1244 	{"fo_FO"		, C_ISO_8859_1	, C_ISO_8859_1},
1245 	{"fr_BE"		, C_ISO_8859_1	, C_ISO_8859_1},
1246 	{"fr_CA"		, C_ISO_8859_1	, C_ISO_8859_1},
1247 	{"fr_CH"		, C_ISO_8859_1	, C_ISO_8859_1},
1248 	{"fr_FR"		, C_ISO_8859_1	, C_ISO_8859_1},
1249 	{"fr_LU"		, C_ISO_8859_1	, C_ISO_8859_1},
1250 	{"ga_IE"		, C_ISO_8859_1	, C_ISO_8859_1},
1251 	{"gl_ES"		, C_ISO_8859_1	, C_ISO_8859_1},
1252 	{"gv_GB"		, C_ISO_8859_1	, C_ISO_8859_1},
1253 	{"id_ID"		, C_ISO_8859_1	, C_ISO_8859_1},
1254 	{"is_IS"		, C_ISO_8859_1	, C_ISO_8859_1},
1255 	{"it_CH"		, C_ISO_8859_1	, C_ISO_8859_1},
1256 	{"it_IT"		, C_ISO_8859_1	, C_ISO_8859_1},
1257 	{"kl_GL"		, C_ISO_8859_1	, C_ISO_8859_1},
1258 	{"kw_GB"		, C_ISO_8859_1	, C_ISO_8859_1},
1259 	{"ms_MY"		, C_ISO_8859_1	, C_ISO_8859_1},
1260 	{"nl_BE"		, C_ISO_8859_1	, C_ISO_8859_1},
1261 	{"nl_NL"		, C_ISO_8859_1	, C_ISO_8859_1},
1262 	{"nb_NO"		, C_ISO_8859_1  , C_ISO_8859_1},
1263 	{"nn_NO"		, C_ISO_8859_1	, C_ISO_8859_1},
1264 	{"no_NO"		, C_ISO_8859_1	, C_ISO_8859_1},
1265 	{"oc_FR"		, C_ISO_8859_1	, C_ISO_8859_1},
1266 	{"pt_BR"		, C_ISO_8859_1	, C_ISO_8859_1},
1267 	{"pt_PT"		, C_ISO_8859_1	, C_ISO_8859_1},
1268 	{"sq_AL"		, C_ISO_8859_1	, C_ISO_8859_1},
1269 	{"sv_FI"		, C_ISO_8859_1	, C_ISO_8859_1},
1270 	{"sv_SE"		, C_ISO_8859_1	, C_ISO_8859_1},
1271 	{"tl_PH"		, C_ISO_8859_1	, C_ISO_8859_1},
1272 	{"uz_UZ"		, C_ISO_8859_1	, C_ISO_8859_1},
1273 	{"wa_BE"		, C_ISO_8859_1	, C_ISO_8859_1},
1274 
1275 	{"bs_BA"		, C_ISO_8859_2	, C_ISO_8859_2},
1276 	{"cs_CZ"		, C_ISO_8859_2	, C_ISO_8859_2},
1277 	{"hr_HR"		, C_ISO_8859_2	, C_ISO_8859_2},
1278 	{"hu_HU"		, C_ISO_8859_2	, C_ISO_8859_2},
1279 	{"pl_PL"		, C_ISO_8859_2	, C_ISO_8859_2},
1280 	{"ro_RO"		, C_ISO_8859_2	, C_ISO_8859_2},
1281 	{"sk_SK"		, C_ISO_8859_2	, C_ISO_8859_2},
1282 	{"sl_SI"		, C_ISO_8859_2	, C_ISO_8859_2},
1283 
1284 	{"sr_YU@cyrillic"	, C_ISO_8859_5	, C_ISO_8859_5},
1285 	{"sr_YU"		, C_ISO_8859_2	, C_ISO_8859_2},
1286 
1287 	{"mt_MT"		, C_ISO_8859_3	, C_ISO_8859_3},
1288 
1289 	{"lt_LT.iso88594"	, C_ISO_8859_4	, C_ISO_8859_4},
1290 	{"lt_LT.ISO8859-4"	, C_ISO_8859_4	, C_ISO_8859_4},
1291 	{"lt_LT.ISO_8859-4"	, C_ISO_8859_4	, C_ISO_8859_4},
1292 	{"lt_LT"		, C_ISO_8859_13	, C_ISO_8859_13},
1293 
1294 	{"mk_MK"		, C_ISO_8859_5	, C_ISO_8859_5},
1295 
1296 	{"ar_AE"		, C_ISO_8859_6	, C_ISO_8859_6},
1297 	{"ar_BH"		, C_ISO_8859_6	, C_ISO_8859_6},
1298 	{"ar_DZ"		, C_ISO_8859_6	, C_ISO_8859_6},
1299 	{"ar_EG"		, C_ISO_8859_6	, C_ISO_8859_6},
1300 	{"ar_IQ"		, C_ISO_8859_6	, C_ISO_8859_6},
1301 	{"ar_JO"		, C_ISO_8859_6	, C_ISO_8859_6},
1302 	{"ar_KW"		, C_ISO_8859_6	, C_ISO_8859_6},
1303 	{"ar_LB"		, C_ISO_8859_6	, C_ISO_8859_6},
1304 	{"ar_LY"		, C_ISO_8859_6	, C_ISO_8859_6},
1305 	{"ar_MA"		, C_ISO_8859_6	, C_ISO_8859_6},
1306 	{"ar_OM"		, C_ISO_8859_6	, C_ISO_8859_6},
1307 	{"ar_QA"		, C_ISO_8859_6	, C_ISO_8859_6},
1308 	{"ar_SA"		, C_ISO_8859_6	, C_ISO_8859_6},
1309 	{"ar_SD"		, C_ISO_8859_6	, C_ISO_8859_6},
1310 	{"ar_SY"		, C_ISO_8859_6	, C_ISO_8859_6},
1311 	{"ar_TN"		, C_ISO_8859_6	, C_ISO_8859_6},
1312 	{"ar_YE"		, C_ISO_8859_6	, C_ISO_8859_6},
1313 
1314 	{"el_GR"		, C_ISO_8859_7	, C_ISO_8859_7},
1315 	{"he_IL"		, C_ISO_8859_8	, C_ISO_8859_8},
1316 	{"iw_IL"		, C_ISO_8859_8	, C_ISO_8859_8},
1317 	{"tr_TR"		, C_ISO_8859_9	, C_ISO_8859_9},
1318 
1319 	{"lv_LV"		, C_ISO_8859_13	, C_ISO_8859_13},
1320 	{"mi_NZ"		, C_ISO_8859_13	, C_ISO_8859_13},
1321 
1322 	{"cy_GB"		, C_ISO_8859_14	, C_ISO_8859_14},
1323 
1324 	{"ar_IN"		, C_UTF_8	, C_UTF_8},
1325 	{"en_IN"		, C_UTF_8	, C_UTF_8},
1326 	{"se_NO"		, C_UTF_8	, C_UTF_8},
1327 	{"ta_IN"		, C_UTF_8	, C_UTF_8},
1328 	{"te_IN"		, C_UTF_8	, C_UTF_8},
1329 	{"ur_PK"		, C_UTF_8	, C_UTF_8},
1330 
1331 	{"th_TH"		, C_TIS_620	, C_TIS_620},
1332 	/* {"th_TH"		, C_WINDOWS_874}, */
1333 	/* {"th_TH"		, C_ISO_8859_11}, */
1334 
1335 	{"ka_GE"		, C_GEORGIAN_PS	, C_GEORGIAN_PS},
1336 	{"vi_VN.TCVN"		, C_TCVN5712_1	, C_TCVN5712_1},
1337 
1338 	{"C"			, C_US_ASCII	, C_US_ASCII},
1339 	{"POSIX"		, C_US_ASCII	, C_US_ASCII},
1340 	{"ANSI_X3.4-1968"	, C_US_ASCII	, C_US_ASCII},
1341 };
1342 
conv_get_charset_to_str_table(void)1343 static GHashTable *conv_get_charset_to_str_table(void)
1344 {
1345 	static GHashTable *table;
1346 	gint i;
1347 
1348 	if (table)
1349 		return table;
1350 
1351 	table = g_hash_table_new(NULL, g_direct_equal);
1352 
1353 	for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1354 		if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1355 		    == NULL) {
1356 			g_hash_table_insert
1357 				(table, GUINT_TO_POINTER(charsets[i].charset),
1358 				 charsets[i].name);
1359 		}
1360 	}
1361 
1362 	return table;
1363 }
1364 
conv_get_charset_from_str_table(void)1365 static GHashTable *conv_get_charset_from_str_table(void)
1366 {
1367 	static GHashTable *table;
1368 	gint i;
1369 
1370 	if (table)
1371 		return table;
1372 
1373 	table = g_hash_table_new(str_case_hash, str_case_equal);
1374 
1375 	for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1376 		g_hash_table_insert(table, charsets[i].name,
1377 				    GUINT_TO_POINTER(charsets[i].charset));
1378 	}
1379 
1380 	return table;
1381 }
1382 
conv_get_charset_str(CharSet charset)1383 const gchar *conv_get_charset_str(CharSet charset)
1384 {
1385 	GHashTable *table;
1386 
1387 	table = conv_get_charset_to_str_table();
1388 	return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1389 }
1390 
conv_get_charset_from_str(const gchar * charset)1391 CharSet conv_get_charset_from_str(const gchar *charset)
1392 {
1393 	GHashTable *table;
1394 
1395 	if (!charset) return C_AUTO;
1396 
1397 	table = conv_get_charset_from_str_table();
1398 	return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1399 }
1400 
conv_get_locale_charset(void)1401 static CharSet conv_get_locale_charset(void)
1402 {
1403 	static CharSet cur_charset = C_UNINITIALIZED;
1404 	const gchar *cur_locale;
1405 	const gchar *p;
1406 	gint i;
1407 
1408 	if (cur_charset != C_UNINITIALIZED)
1409 		return cur_charset;
1410 
1411 	cur_locale = conv_get_current_locale();
1412 	if (!cur_locale) {
1413 		cur_charset = C_US_ASCII;
1414 		return cur_charset;
1415 	}
1416 
1417 	if (strcasestr(cur_locale, "UTF-8") ||
1418 	    strcasestr(cur_locale, "utf8")) {
1419 		cur_charset = C_UTF_8;
1420 		return cur_charset;
1421 	}
1422 
1423 	if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1424 		cur_charset = C_ISO_8859_15;
1425 		return cur_charset;
1426 	}
1427 
1428 	for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1429 		const gchar *p;
1430 
1431 		/* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1432 		   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1433 		if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1434 				 strlen(locale_table[i].locale))) {
1435 			cur_charset = locale_table[i].charset;
1436 			return cur_charset;
1437 		} else if ((p = strchr(locale_table[i].locale, '_')) &&
1438 			 !strchr(p + 1, '.')) {
1439 			if (strlen(cur_locale) == 2 &&
1440 			    !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1441 				cur_charset = locale_table[i].charset;
1442 				return cur_charset;
1443 			}
1444 		}
1445 	}
1446 
1447 	cur_charset = C_AUTO;
1448 	return cur_charset;
1449 }
1450 
conv_get_locale_charset_no_utf8(void)1451 static CharSet conv_get_locale_charset_no_utf8(void)
1452 {
1453 	static CharSet cur_charset = C_UNINITIALIZED;
1454 	const gchar *cur_locale;
1455 	const gchar *p;
1456 	gint i;
1457 
1458 	if (codeconv_broken_are_utf8) {
1459 		cur_charset = C_UTF_8;
1460 		return cur_charset;
1461 	}
1462 
1463 	cur_locale = conv_get_current_locale();
1464 	if (!cur_locale) {
1465 		cur_charset = C_US_ASCII;
1466 		return cur_charset;
1467 	}
1468 
1469 	if (strcasestr(cur_locale, "UTF-8") ||
1470 	    strcasestr(cur_locale, "utf8")) {
1471 		cur_charset = C_UTF_8;
1472 		return cur_charset;
1473 	}
1474 
1475 	if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1476 		cur_charset = C_ISO_8859_15;
1477 		return cur_charset;
1478 	}
1479 
1480 	for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1481 		const gchar *p;
1482 
1483 		/* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1484 		   "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1485 		if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1486 				 strlen(locale_table[i].locale))) {
1487 			cur_charset = locale_table[i].charset;
1488 			return cur_charset;
1489 		} else if ((p = strchr(locale_table[i].locale, '_')) &&
1490 			 !strchr(p + 1, '.')) {
1491 			if (strlen(cur_locale) == 2 &&
1492 			    !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1493 				cur_charset = locale_table[i].charset;
1494 				return cur_charset;
1495 			}
1496 		}
1497 	}
1498 
1499 	cur_charset = C_AUTO;
1500 	return cur_charset;
1501 }
1502 
conv_get_locale_charset_str(void)1503 const gchar *conv_get_locale_charset_str(void)
1504 {
1505 	static const gchar *codeset = NULL;
1506 
1507 	if (!codeset)
1508 		codeset = conv_get_charset_str(conv_get_locale_charset());
1509 
1510 	return codeset ? codeset : CS_INTERNAL;
1511 }
1512 
conv_get_locale_charset_str_no_utf8(void)1513 const gchar *conv_get_locale_charset_str_no_utf8(void)
1514 {
1515 	static const gchar *codeset = NULL;
1516 
1517 	if (!codeset)
1518 		codeset = conv_get_charset_str(conv_get_locale_charset_no_utf8());
1519 
1520 	return codeset ? codeset : CS_INTERNAL;
1521 }
1522 
conv_get_outgoing_charset(void)1523 static CharSet conv_get_outgoing_charset(void)
1524 {
1525 	static CharSet out_charset = C_UNINITIALIZED;
1526 	const gchar *cur_locale;
1527 	const gchar *p;
1528 	gint i;
1529 
1530 	if (out_charset != C_UNINITIALIZED)
1531 		return out_charset;
1532 
1533 	cur_locale = conv_get_current_locale();
1534 	if (!cur_locale) {
1535 		out_charset = C_AUTO;
1536 		return out_charset;
1537 	}
1538 
1539 	if (strcasestr(cur_locale, "UTF-8") ||
1540 	    strcasestr(cur_locale, "utf8")) {
1541 		out_charset = C_UTF_8;
1542 		return out_charset;
1543 	}
1544 
1545 	if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1546 		out_charset = C_ISO_8859_15;
1547 		return out_charset;
1548 	}
1549 
1550 	for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1551 		const gchar *p;
1552 
1553 		if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1554 				 strlen(locale_table[i].locale))) {
1555 			out_charset = locale_table[i].out_charset;
1556 			break;
1557 		} else if ((p = strchr(locale_table[i].locale, '_')) &&
1558 			 !strchr(p + 1, '.')) {
1559 			if (strlen(cur_locale) == 2 &&
1560 			    !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1561 				out_charset = locale_table[i].out_charset;
1562 				break;
1563 			}
1564 		}
1565 	}
1566 
1567 	return out_charset;
1568 }
1569 
conv_get_outgoing_charset_str(void)1570 const gchar *conv_get_outgoing_charset_str(void)
1571 {
1572 	CharSet out_charset;
1573 	const gchar *str;
1574 
1575 	out_charset = conv_get_outgoing_charset();
1576 	str = conv_get_charset_str(out_charset);
1577 
1578 	return str ? str : CS_UTF_8;
1579 }
1580 
conv_get_current_locale(void)1581 const gchar *conv_get_current_locale(void)
1582 {
1583 	const gchar *cur_locale;
1584 
1585 #ifdef G_OS_WIN32
1586 	cur_locale = g_win32_getlocale();
1587 #else
1588 	cur_locale = g_getenv("LC_ALL");
1589 	if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1590 	if (!cur_locale) cur_locale = g_getenv("LANG");
1591 	if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1592 #endif /* G_OS_WIN32 */
1593 
1594 	debug_print("current locale: %s\n",
1595 		    cur_locale ? cur_locale : "(none)");
1596 
1597 	return cur_locale;
1598 }
1599 
conv_is_ja_locale(void)1600 static gboolean conv_is_ja_locale(void)
1601 {
1602 	static gint is_ja_locale = -1;
1603 	const gchar *cur_locale;
1604 
1605 	if (is_ja_locale != -1)
1606 		return is_ja_locale != 0;
1607 
1608 	is_ja_locale = 0;
1609 	cur_locale = conv_get_current_locale();
1610 	if (cur_locale) {
1611 		if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
1612 			is_ja_locale = 1;
1613 	}
1614 
1615 	return is_ja_locale != 0;
1616 }
1617 
conv_unmime_header(const gchar * str,const gchar * default_encoding,gboolean addr_field)1618 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding,
1619 			   gboolean addr_field)
1620 {
1621 	gchar buf[BUFFSIZE];
1622 
1623 	cm_return_val_if_fail(str != NULL, NULL);
1624 
1625 	if (is_ascii_str(str))
1626 		return unmime_header(str, addr_field);
1627 
1628 	if (default_encoding) {
1629 		gchar *utf8_buf;
1630 
1631 		utf8_buf = conv_codeset_strdup
1632 			(str, default_encoding, CS_INTERNAL);
1633 		if (utf8_buf) {
1634 			gchar *decoded_str;
1635 
1636 			decoded_str = unmime_header(utf8_buf, addr_field);
1637 			g_free(utf8_buf);
1638 			return decoded_str;
1639 		}
1640 	}
1641 
1642 	if (conv_is_ja_locale())
1643 		conv_anytodisp(buf, sizeof(buf), str);
1644 	else
1645 		conv_localetodisp(buf, sizeof(buf), str);
1646 
1647 	return unmime_header(buf, addr_field);
1648 }
1649 
1650 #define MAX_LINELEN		76
1651 #define MAX_HARD_LINELEN	996
1652 #define MIMESEP_BEGIN		"=?"
1653 #define MIMESEP_END		"?="
1654 
1655 #define LBREAK_IF_REQUIRED(cond, is_plain_text)				\
1656 {									\
1657 	if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) {		\
1658 		*destp = '\0';						\
1659 		return;							\
1660 	}								\
1661 									\
1662 	if ((cond) && *srcp) {						\
1663 		if (destp > (guchar *)dest && left < MAX_LINELEN - 1) {	\
1664 			if (isspace(*(destp - 1)))			\
1665 				destp--;				\
1666 			else if (is_plain_text && isspace(*srcp))	\
1667 				srcp++;					\
1668 			if (*srcp) {					\
1669 				*destp++ = '\n';			\
1670 				*destp++ = ' ';				\
1671 				left = MAX_LINELEN - 1;			\
1672 			}						\
1673 		} else if (destp == (guchar *)dest && left < 7) {	\
1674 			if (is_plain_text && isspace(*srcp))		\
1675 				srcp++;					\
1676 			if (*srcp) {					\
1677 				*destp++ = '\n';			\
1678 				*destp++ = ' ';				\
1679 				left = MAX_LINELEN - 1;			\
1680 			}						\
1681 		}							\
1682 	}								\
1683 }
1684 
1685 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1686 
conv_encode_header_full(gchar * dest,gint len,const gchar * src,gint header_len,gboolean addr_field,const gchar * out_encoding_)1687 void conv_encode_header_full(gchar *dest, gint len, const gchar *src,
1688 			gint header_len, gboolean addr_field,
1689 			const gchar *out_encoding_)
1690 {
1691 	const gchar *cur_encoding;
1692 	const gchar *out_encoding;
1693 	gint mimestr_len;
1694 	gchar *mimesep_enc;
1695 	gint left;
1696 	const guchar *srcp = src;
1697 	guchar *destp = dest;
1698 	gboolean use_base64;
1699 
1700 	cm_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1701 	cm_return_if_fail(destp != NULL);
1702 
1703 	if (MB_CUR_MAX > 1) {
1704 		use_base64 = TRUE;
1705 		mimesep_enc = "?B?";
1706 	} else {
1707 		use_base64 = FALSE;
1708 		mimesep_enc = "?Q?";
1709 	}
1710 
1711 	cur_encoding = CS_INTERNAL;
1712 
1713 	if (out_encoding_)
1714 		out_encoding = out_encoding_;
1715 	else
1716 		out_encoding = conv_get_outgoing_charset_str();
1717 
1718 	if (!strcmp(out_encoding, CS_US_ASCII))
1719 		out_encoding = CS_ISO_8859_1;
1720 
1721 	mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1722 		strlen(mimesep_enc) + strlen(MIMESEP_END);
1723 
1724 	left = MAX_LINELEN - header_len;
1725 
1726 	while (*srcp) {
1727 		LBREAK_IF_REQUIRED(left <= 0, TRUE);
1728 
1729 		while (isspace(*srcp)) {
1730 			*destp++ = *srcp++;
1731 			left--;
1732 			LBREAK_IF_REQUIRED(left <= 0, TRUE);
1733 		}
1734 
1735 		/* output as it is if the next word is ASCII string */
1736 		if (!is_next_nonascii(srcp)) {
1737 			gint word_len;
1738 
1739 			word_len = get_next_word_len(srcp);
1740 			LBREAK_IF_REQUIRED(left < word_len, TRUE);
1741 			while (word_len > 0) {
1742 				LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1743 				*destp++ = *srcp++;
1744 				left--;
1745 				word_len--;
1746 			}
1747 
1748 			continue;
1749 		}
1750 
1751 		/* don't include parentheses and quotes in encoded strings */
1752 		if (addr_field && (*srcp == '(' || *srcp == ')' || *srcp == '"')) {
1753 			LBREAK_IF_REQUIRED(left < 2, FALSE);
1754 			*destp++ = *srcp++;
1755 			left--;
1756 		}
1757 
1758 		while (1) {
1759 			gint mb_len = 0;
1760 			gint cur_len = 0;
1761 			gchar *part_str;
1762 			gchar *out_str;
1763 			gchar *enc_str;
1764 			const guchar *p = srcp;
1765 			gint out_str_len;
1766 			gint out_enc_str_len;
1767 			gint mime_block_len;
1768 			gboolean cont = FALSE;
1769 
1770 			while (*p != '\0') {
1771 				if (isspace(*p) && !is_next_nonascii(p + 1))
1772 					break;
1773 				/* don't include parentheses in encoded
1774 				   strings */
1775 				if (addr_field && (*p == '(' || *p == ')' || *p == '"'))
1776 					break;
1777 
1778 				mb_len = g_utf8_skip[*p];
1779 
1780 				Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1781 				out_str = conv_codeset_strdup
1782 					(part_str, cur_encoding, out_encoding);
1783 				if (!out_str) {
1784 					if (codeconv_strict_mode) {
1785 						*dest = '\0';
1786 						return;
1787 					} else {
1788 						g_warning("conv_encode_header_full(): code conversion failed");
1789 						conv_unreadable_8bit(part_str);
1790 						out_str = g_strdup(part_str);
1791 					}
1792 				}
1793 				out_str_len = strlen(out_str);
1794 
1795 				if (use_base64)
1796 					out_enc_str_len = B64LEN(out_str_len);
1797 				else
1798 					out_enc_str_len =
1799 						qp_get_q_encoding_len(out_str);
1800 
1801 				g_free(out_str);
1802 
1803 				if (mimestr_len + out_enc_str_len <= left) {
1804 					cur_len += mb_len;
1805 					p += mb_len;
1806 				} else if (cur_len == 0) {
1807 					left = 0;
1808 					LBREAK_IF_REQUIRED(1, FALSE);
1809 					continue;
1810 				} else {
1811 					cont = TRUE;
1812 					break;
1813 				}
1814 			}
1815 
1816 			if (cur_len > 0) {
1817 				Xstrndup_a(part_str, srcp, cur_len, );
1818 				out_str = conv_codeset_strdup
1819 					(part_str, cur_encoding, out_encoding);
1820 				if (!out_str) {
1821 					g_warning("conv_encode_header_full(): code conversion failed");
1822 					conv_unreadable_8bit(part_str);
1823 					out_str = g_strdup(part_str);
1824 				}
1825 				out_str_len = strlen(out_str);
1826 
1827 				if (use_base64)
1828 					out_enc_str_len = B64LEN(out_str_len);
1829 				else
1830 					out_enc_str_len =
1831 						qp_get_q_encoding_len(out_str);
1832 
1833 				if (use_base64)
1834 					enc_str = g_base64_encode(out_str, out_str_len);
1835 				else {
1836 					Xalloca(enc_str, out_enc_str_len + 1, );
1837 					qp_q_encode(enc_str, out_str);
1838 				}
1839 
1840 				g_free(out_str);
1841 
1842 				/* output MIME-encoded string block */
1843 				mime_block_len = mimestr_len + strlen(enc_str);
1844 				g_snprintf(destp, mime_block_len + 1,
1845 					   MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1846 					   out_encoding, mimesep_enc, enc_str);
1847 
1848 				if (use_base64)
1849 					g_free(enc_str);
1850 
1851 				destp += mime_block_len;
1852 				srcp += cur_len;
1853 
1854 				left -= mime_block_len;
1855 			}
1856 
1857 			LBREAK_IF_REQUIRED(cont, FALSE);
1858 
1859 			if (cur_len == 0)
1860 				break;
1861 		}
1862 	}
1863 
1864 	*destp = '\0';
1865 }
1866 
conv_encode_header(gchar * dest,gint len,const gchar * src,gint header_len,gboolean addr_field)1867 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1868 			gint header_len, gboolean addr_field)
1869 {
1870 	conv_encode_header_full(dest,len,src,header_len,addr_field,NULL);
1871 }
1872 
1873 #undef LBREAK_IF_REQUIRED
1874 #undef B64LEN
1875 
conv_filename_from_utf8(const gchar * utf8_file)1876 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1877 {
1878 	gchar *fs_file;
1879 	GError *error = NULL;
1880 
1881 	cm_return_val_if_fail(utf8_file != NULL, NULL);
1882 
1883 	fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1884 	if (error) {
1885 		debug_print("failed to convert encoding of file name: %s\n",
1886 			  error->message);
1887 		g_error_free(error);
1888 	}
1889 	if (!fs_file)
1890 		fs_file = g_strdup(utf8_file);
1891 
1892 	return fs_file;
1893 }
1894 
conv_filename_to_utf8(const gchar * fs_file)1895 gchar *conv_filename_to_utf8(const gchar *fs_file)
1896 {
1897 	gchar *utf8_file = NULL;
1898 	GError *error = NULL;
1899 
1900 	cm_return_val_if_fail(fs_file != NULL, NULL);
1901 
1902 	utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1903 	if (error) {
1904 		g_warning("failed to convert encoding of file name: %s",
1905 			  error->message);
1906 		g_error_free(error);
1907 	}
1908 
1909 	if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1910 		g_free(utf8_file);
1911 		utf8_file = g_strdup(fs_file);
1912 		conv_unreadable_8bit(utf8_file);
1913 	}
1914 
1915 	return utf8_file;
1916 }
1917