1 /* -*- mode: C; mode: fold -*- */
2 /* Charset handling routines.
3 *
4 * Author: Felix Schueller
5 * Modified by JED.
6 *
7 */
8
9 #include "config.h"
10 #include "slrnfeat.h"
11
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <string.h>
15 #include <errno.h>
16
17 #if defined(HAVE_LOCALE_H) && defined(HAVE_LANGINFO_H)
18 # include <locale.h>
19 # include <langinfo.h>
20 #endif
21
22 #ifdef HAVE_ICONV
23 # include <iconv.h>
24 #endif
25
26 #include <slang.h>
27
28 #include "jdmacros.h"
29 #include "slrn.h"
30 #include "group.h"
31 #include "art.h"
32 #include "util.h"
33 #include "snprintf.h"
34 #include "mime.h"
35 #include "strutil.h"
36 #include "charset.h"
37 #include "common.h"
38
39 char *Slrn_Config_Charset = NULL;
40 char *Slrn_Display_Charset = NULL;
41 char *Slrn_Editor_Charset = NULL;
42 char *Slrn_Outgoing_Charset = NULL;
43 char *Slrn_Fallback_Input_Charset = NULL;
44
slrn_init_charset(void)45 void slrn_init_charset (void)
46 {
47 #if defined(HAVE_LOCALE_H) && defined(HAVE_LANGINFO_H) && defined(CODESET)
48 if (Slrn_Display_Charset == NULL)
49 {
50 /* setlocale has already been called when this function is called */
51 /* setlocale(LC_ALL, ""); */
52 char *charset = nl_langinfo (CODESET);
53 if ((charset != NULL) && (*charset != 0))
54 Slrn_Display_Charset = slrn_safe_strmalloc (nl_langinfo (CODESET));
55 }
56 #endif
57 }
58
slrn_prepare_charset(void)59 void slrn_prepare_charset (void)
60 {
61 if (Slrn_Display_Charset == NULL)
62 {
63 char *charset = "US-ASCII";
64 if (Slrn_UTF8_Mode)
65 charset = "UTF-8";
66 Slrn_Display_Charset = slrn_safe_strmalloc (charset);
67 }
68 if (Slrn_Outgoing_Charset == NULL)
69 {
70 Slrn_Outgoing_Charset = Slrn_Display_Charset;
71 }
72 if ((Slrn_Editor_Charset != NULL) && (0 == slrn_case_strcmp(Slrn_Display_Charset, Slrn_Editor_Charset)))
73 {
74 slrn_free(Slrn_Editor_Charset);
75 Slrn_Editor_Charset=NULL;
76 }
77 }
78
79 /* returns 1 if *str contains chars not in us-ascii, 0 else */
slrn_string_nonascii(char * str)80 int slrn_string_nonascii(char *str)
81 {
82 while(*str != '\0')
83 {
84 if (*str & 0x80)
85 return 1;
86 str++;
87 }
88 return 0;
89 }
90
91 #ifdef HAVE_ICONV
92 /* returns the converted string, or NULL on error or if no convertion is needed*/
93 /* Returns 1 if iconv succeeded, 0 if it failed, or -1 upon some other error.
94 * This function returns 0 only if test is 1. Otherwise, if test is 0 and
95 * illegal bytes are encountered, they will be replaced by ?s.
96 */
iconv_convert_string(iconv_t cd,char * str,size_t len,int test,char ** outstrp)97 static int iconv_convert_string (iconv_t cd, char *str, size_t len, int test, char **outstrp)
98 {
99 char *buf, *bufp;
100 unsigned int buflen;
101 size_t inbytesleft;
102 size_t outbytesleft;
103 int fail_error;
104 int need_realloc;
105
106 if (len == 0)
107 return 0;
108
109 if (test)
110 fail_error = 0;
111 else
112 fail_error = -1;
113
114 *outstrp = NULL;
115 inbytesleft = len;
116 bufp = buf = NULL;
117 buflen = 0;
118 outbytesleft = 0;
119 need_realloc = 1;
120
121 while (inbytesleft)
122 {
123 size_t ret;
124
125 if (need_realloc)
126 {
127 char *tmpbuf;
128 unsigned int dsize = 2*len;
129 buflen += dsize;
130 outbytesleft += dsize;
131 if (NULL == (tmpbuf = slrn_realloc (buf, buflen+1, test==0)))
132 {
133 slrn_free (buf);
134 return fail_error;
135 }
136 bufp = tmpbuf + (bufp - buf);
137 buf = tmpbuf;
138 need_realloc = 0;
139 }
140
141 errno = 0;
142 ret = iconv (cd, &str, &inbytesleft, &bufp, &outbytesleft);
143 #ifdef NON_GNU_ICONV
144 if (ret == 0)
145 break;
146 #else
147 if (ret != (size_t) -1)
148 break;
149 #endif
150 switch (errno)
151 {
152 default:
153 case EINVAL:
154 case EILSEQ: /* invalid byte sequence */
155 if (test)
156 {
157 slrn_free (buf);
158 return 0;
159 }
160 *bufp++ = '?';
161 str++;
162 inbytesleft--;
163 outbytesleft--;
164 /* FIXME: Should the shift-state be reset? */
165 break;
166 #ifndef NON_GNU_ICONV
167 case 0: /* windows bug */
168 #endif
169 case E2BIG:
170 need_realloc = 1;
171 break;
172 }
173 }
174
175 len = (unsigned int) (bufp - buf);
176 bufp = slrn_realloc (buf, len+1, 1);
177 if (bufp == NULL)
178 {
179 slrn_free (buf);
180 return fail_error;
181 }
182 bufp[len] = 0;
183 *outstrp = bufp;
184
185 return 1;
186 }
187 #endif
188
189 /* Guess a character set from the bytes in the string -- it returns a
190 * malloced string.
191 */
slrn_guess_charset(char * str,char * strmax)192 char *slrn_guess_charset (char *str, char *strmax)
193 {
194 char *charset = "us-ascii";
195
196 while (str < strmax)
197 {
198 unsigned int nconsumed;
199 SLwchar_Type wch;
200
201 if ((*str & 0x80) == 0)
202 {
203 str++;
204 continue;
205 }
206
207 /* First see if it looks like UTF-8 */
208 if (NULL != SLutf8_decode ((SLuchar_Type *)str, (SLuchar_Type *)strmax, &wch, &nconsumed))
209 {
210 charset = "UTF-8";
211 break;
212 }
213
214 charset = Slrn_Fallback_Input_Charset;
215 if (charset == NULL)
216 charset = "iso-8859-1";
217
218 break;
219 }
220 return slrn_strmalloc (charset, 1);
221 }
222
slrn_convert_string(char * from,char * str,char * strmax,char * to,int test)223 char *slrn_convert_string (char *from, char *str, char *strmax, char *to, int test)
224 {
225 #ifdef HAVE_ICONV
226 iconv_t cd;
227 int status;
228 char *substr;
229 int free_from = 0;
230
231 if ((from == NULL)
232 || (0 == slrn_case_strcmp (from, "unknown-8bit"))
233 || (0 == slrn_case_strcmp (from, "x-user-defined")))
234 {
235 from = slrn_guess_charset (str, strmax);
236 if (from == NULL)
237 return NULL;
238 free_from = 1;
239 }
240
241 if ((cd = iconv_open(to, from)) == (iconv_t)(-1))
242 {
243 if (test == 0)
244 slrn_error (_("Can't convert %s -> %s\n"), from, to);
245
246 if (free_from)
247 slrn_free (from);
248
249 return NULL;
250 }
251
252 status = iconv_convert_string (cd, str, strmax-str, test, &substr);
253 iconv_close(cd);
254
255 if (free_from)
256 slrn_free (from);
257
258 if (status == 0)
259 return NULL;
260
261 if (status == -1)
262 return NULL;
263
264 return substr;
265 #else /* no iconv */
266
267 char *s;
268
269 if (from != NULL)
270 {
271 if (0 == strcmp (to, from))
272 return slrn_strnmalloc (str, strmax-str, 1);
273 }
274
275 if (test)
276 return NULL;
277
278 /* Force it to us-ascii */
279 s = slrn_strnmalloc (str, strmax-str, 1);
280 if (s == NULL)
281 return NULL;
282
283 str = s;
284 while (*s)
285 {
286 if (*s & 0x80)
287 *s = '?';
288 s++;
289 }
290 return str;
291 #endif
292 }
293
slrn_convert_substring(char * str,unsigned int offset,unsigned int len,char * to_charset,char * from_charset,int test)294 char *slrn_convert_substring(char *str, unsigned int offset, unsigned int len, char *to_charset, char *from_charset, int test)
295 {
296 char *substr;
297 char *new_str;
298 unsigned int new_len;
299 unsigned int dlen;
300
301 new_len = strlen (str);
302 if (len == 0)
303 return NULL;
304
305 if (offset + len > new_len)
306 {
307 slrn_error ("Internal Error in slrn_convert_substring");
308 return NULL; /* internal error */
309 }
310
311 substr = slrn_convert_string (from_charset, str+offset, str+offset+len,
312 to_charset, test);
313
314 if (substr == NULL)
315 return NULL;
316
317 dlen = strlen (substr);
318 new_len = (new_len - len) + dlen;
319 new_str = slrn_malloc (new_len + 1, 0, 1);
320 if (new_str == NULL)
321 {
322 slrn_free (substr);
323 return NULL;
324 }
325 strncpy (new_str, str, offset);
326 strcpy (new_str + offset, substr);
327 strcpy (new_str + offset + dlen, str + offset + len);
328 slrn_free (substr);
329 return new_str;
330 }
331
slrn_test_and_convert_string(char * str,char ** dest,char * to_charset,char * from_charset)332 int slrn_test_and_convert_string(char *str, char **dest, char *to_charset, char *from_charset)
333 {
334 if (dest == NULL)
335 return -1;
336
337 *dest = NULL;
338
339 if ((to_charset == NULL) || (from_charset == NULL))
340 return 0;
341
342 if (!slrn_string_nonascii(str))
343 return 0;
344
345 if(NULL == (*dest = slrn_convert_substring(str, 0, strlen (str), to_charset, from_charset, 0)))
346 return -1;
347
348 return 0;
349 }
350
slrn_convert_fprintf(FILE * fp,char * to_charset,char * from_charset,const char * format,...)351 int slrn_convert_fprintf(FILE *fp, char *to_charset, char *from_charset, const char *format, ... )
352 {
353 va_list args;
354 int retval;
355 char *str,*tmp;
356
357 va_start (args, format);
358
359 if ((to_charset == NULL) || (from_charset == NULL) || (slrn_case_strcmp(to_charset, from_charset) == 0))
360 {
361 retval = vfprintf (fp, format, args);
362 va_end (args);
363 return retval;
364 }
365
366 str = slrn_strdup_vprintf(format, args);
367 va_end (args);
368
369 if (!slrn_string_nonascii(str))
370 {
371 retval = fputs (str, fp);
372 slrn_free(str);
373 return retval;
374 }
375
376 if (NULL == (tmp = slrn_convert_substring(str, 0, strlen (str), to_charset, from_charset, 0)))
377 {
378 slrn_free(str);
379 return -1;
380 }
381 retval = fputs (tmp, fp);
382 slrn_free(str);
383 slrn_free(tmp);
384
385 return retval;
386 }
387
388 #ifdef HAVE_ICONV
iconv_convert_newline(iconv_t cd)389 static void iconv_convert_newline (iconv_t cd)
390 {
391 char *nl = "\n";
392 char *tmp;
393
394 if (1 == iconv_convert_string (cd, nl, 1, 1, &tmp))
395 slrn_free (tmp);
396 }
397 #endif
398
399 /* converts a->lines */
slrn_convert_article(Slrn_Article_Type * a,char * to_charset,char * from_charset)400 int slrn_convert_article(Slrn_Article_Type *a, char *to_charset, char *from_charset)
401 {
402 #ifdef HAVE_ICONV
403 iconv_t cd;
404 char *tmp;
405 struct Slrn_Article_Line_Type *line=a->lines;
406
407 if ((cd = iconv_open(to_charset, from_charset)) == (iconv_t)(-1))
408 {
409 slrn_error (_("Can't convert %s -> %s\n"), from_charset, to_charset);
410 return -1;
411 }
412
413 /* Headers are handled elsewhere */
414 while ((line != NULL) && (line->flags & HEADER_LINE))
415 {
416 line=line->next;
417 }
418
419 while (line != NULL)
420 {
421 if (1 == iconv_convert_string(cd, line->buf, strlen (line->buf), 0, &tmp))
422 {
423 slrn_free((char *) line->buf);
424 line->buf=tmp;
425 a->mime.was_modified=1;
426 iconv_convert_newline (cd);
427 }
428 line=line->next;
429 }
430 iconv_close(cd);
431 #else
432 (void) a;
433 (void) to_charset;
434 (void) from_charset;
435 #endif
436 return 0;
437 }
438
439 /* It returns 0 if it did not convert, 1 if it did, -1 upon error.
440 * Only those lines that have the 8bit flag set will be converted.
441 */
slrn_test_convert_lines(Slrn_Article_Line_Type * rlines,char * to_charset,char * from_charset,char ** badlinep)442 int slrn_test_convert_lines (Slrn_Article_Line_Type *rlines, char *to_charset, char *from_charset, char **badlinep)
443 {
444 #ifdef HAVE_ICONV
445 Slrn_Article_Line_Type *rline;
446 Slrn_Article_Line_Type *elines, *eline;
447 iconv_t cd;
448 int status;
449
450 if ((cd = iconv_open(to_charset, from_charset)) == (iconv_t)(-1))
451 return 0;
452
453 elines = eline = NULL;
454 rline = rlines;
455
456 status = 0;
457 while (rline != NULL)
458 {
459 Slrn_Article_Line_Type *next;
460
461 if (0 == (rline->flags & LINE_HAS_8BIT_FLAG))
462 {
463 rline = rline->next;
464 continue;
465 }
466
467 next = (Slrn_Article_Line_Type *) slrn_malloc (sizeof(Slrn_Article_Line_Type), 1, 1);
468 if (next == NULL)
469 {
470 status = -1;
471 *badlinep = rline->buf;
472 goto free_return;
473 }
474
475 switch (iconv_convert_string (cd, rline->buf, strlen (rline->buf), 1, &next->buf))
476 {
477 case 1: /* line converted ok */
478 if (eline == NULL)
479 elines = next;
480 else
481 eline->next = next;
482 eline = next;
483 break;
484
485 case 0: /* failed to convert */
486 if (Slrn_Debug_Fp != NULL)
487 {
488 (void) fprintf (Slrn_Debug_Fp, "*** iconv_convert_string failed to convert:\n");
489 (void) fprintf (Slrn_Debug_Fp, "%s\n", rline->buf);
490 (void) fprintf (Slrn_Debug_Fp, "*** from charset=%s to charset=%s\n", from_charset, to_charset);
491 (void) fflush (Slrn_Debug_Fp);
492 }
493 status = 0;
494 *badlinep = rline->buf;
495 slrn_art_free_line (next);
496 goto free_return;
497
498 default:
499 status = -1;
500 *badlinep = rline->buf;
501 slrn_art_free_line (next);
502 goto free_return;
503 }
504 rline=rline->next;
505 }
506
507 /* Converted ok if we get here */
508 eline = elines;
509 rline = rlines;
510 while (rline != NULL)
511 {
512 if (0 == (rline->flags & LINE_HAS_8BIT_FLAG))
513 {
514 rline = rline->next;
515 continue;
516 }
517 slrn_free (rline->buf);
518 rline->buf = eline->buf;
519 eline->buf = NULL;
520
521 rline->flags &= ~LINE_HAS_8BIT_FLAG;
522
523 rline = rline->next;
524 eline = eline->next;
525 }
526 status = 1;
527 /* drop */
528
529 free_return:
530 iconv_close (cd);
531 while (elines != NULL)
532 {
533 eline = elines;
534 elines = elines->next;
535 slrn_art_free_line (eline);
536 }
537 return status;
538
539 #else
540 (void) rlines;
541 (void) to_charset;
542 (void) from_charset;
543 (void) badlinep;
544 return 1;
545 #endif
546 }
547
548