1 /* -*- mode: C; mode: fold -*- */
2 /* Charset handling routines.
3  *
4  * Author: Felix Schueller
5  * Modified by JED.
6  *
7  */
8 
9 #include "config.h"
10 #include "slrnfeat.h"
11 
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <string.h>
15 #include <errno.h>
16 
17 #if defined(HAVE_LOCALE_H) && defined(HAVE_LANGINFO_H)
18 # include <locale.h>
19 # include <langinfo.h>
20 #endif
21 
22 #ifdef HAVE_ICONV
23 # include <iconv.h>
24 #endif
25 
26 #include <slang.h>
27 
28 #include "jdmacros.h"
29 #include "slrn.h"
30 #include "group.h"
31 #include "art.h"
32 #include "util.h"
33 #include "snprintf.h"
34 #include "mime.h"
35 #include "strutil.h"
36 #include "charset.h"
37 #include "common.h"
38 
39 char *Slrn_Config_Charset  = NULL;
40 char *Slrn_Display_Charset  = NULL;
41 char *Slrn_Editor_Charset  = NULL;
42 char *Slrn_Outgoing_Charset  = NULL;
43 char *Slrn_Fallback_Input_Charset = NULL;
44 
slrn_init_charset(void)45 void slrn_init_charset (void)
46 {
47 #if defined(HAVE_LOCALE_H) && defined(HAVE_LANGINFO_H) && defined(CODESET)
48   if (Slrn_Display_Charset == NULL)
49     {
50        /* setlocale has already been called when this function is called */
51        /* setlocale(LC_ALL, ""); */
52        char *charset = nl_langinfo (CODESET);
53        if ((charset != NULL) && (*charset != 0))
54 	 Slrn_Display_Charset = slrn_safe_strmalloc (nl_langinfo (CODESET));
55     }
56 #endif
57 }
58 
slrn_prepare_charset(void)59 void slrn_prepare_charset (void)
60 {
61   if (Slrn_Display_Charset == NULL)
62     {
63        char *charset = "US-ASCII";
64        if (Slrn_UTF8_Mode)
65 	 charset = "UTF-8";
66        Slrn_Display_Charset = slrn_safe_strmalloc (charset);
67     }
68   if (Slrn_Outgoing_Charset == NULL)
69     {
70        Slrn_Outgoing_Charset = Slrn_Display_Charset;
71     }
72   if ((Slrn_Editor_Charset != NULL) && (0 == slrn_case_strcmp(Slrn_Display_Charset, Slrn_Editor_Charset)))
73     {
74        slrn_free(Slrn_Editor_Charset);
75        Slrn_Editor_Charset=NULL;
76     }
77 }
78 
79 /* returns 1 if *str contains chars not in us-ascii, 0 else */
slrn_string_nonascii(char * str)80 int slrn_string_nonascii(char *str)
81 {
82   while(*str != '\0')
83     {
84        if (*str & 0x80)
85 	    return 1;
86        str++;
87     }
88   return 0;
89 }
90 
91 #ifdef HAVE_ICONV
92 /* returns the converted string, or NULL on error or if no convertion is needed*/
93 /* Returns 1 if iconv succeeded, 0 if it failed, or -1 upon some other error.
94  * This function returns 0 only if test is 1.  Otherwise, if test is 0 and
95  * illegal bytes are encountered, they will be replaced by ?s.
96  */
iconv_convert_string(iconv_t cd,char * str,size_t len,int test,char ** outstrp)97 static int iconv_convert_string (iconv_t cd, char *str, size_t len, int test, char **outstrp)
98 {
99    char *buf, *bufp;
100    unsigned int buflen;
101    size_t inbytesleft;
102    size_t outbytesleft;
103    int fail_error;
104    int need_realloc;
105 
106    if (len == 0)
107      return 0;
108 
109    if (test)
110      fail_error = 0;
111    else
112      fail_error = -1;
113 
114    *outstrp = NULL;
115    inbytesleft = len;
116    bufp = buf = NULL;
117    buflen = 0;
118    outbytesleft = 0;
119    need_realloc = 1;
120 
121    while (inbytesleft)
122      {
123 	size_t ret;
124 
125 	if (need_realloc)
126 	  {
127 	     char *tmpbuf;
128 	     unsigned int dsize = 2*len;
129 	     buflen += dsize;
130 	     outbytesleft += dsize;
131 	     if (NULL == (tmpbuf = slrn_realloc (buf, buflen+1, test==0)))
132 	       {
133 		  slrn_free (buf);
134 		  return fail_error;
135 	       }
136 	     bufp = tmpbuf + (bufp - buf);
137 	     buf = tmpbuf;
138 	     need_realloc = 0;
139 	  }
140 
141 	errno = 0;
142 	ret = iconv (cd, &str, &inbytesleft, &bufp, &outbytesleft);
143 #ifdef NON_GNU_ICONV
144 	if (ret == 0)
145 	  break;
146 #else
147 	if (ret != (size_t) -1)
148 	  break;
149 #endif
150 	switch (errno)
151 	  {
152 	   default:
153 	   case EINVAL:
154 	   case EILSEQ:	       /* invalid byte sequence */
155 	     if (test)
156 	       {
157 		  slrn_free (buf);
158 		  return 0;
159 	       }
160 	     *bufp++ = '?';
161 	     str++;
162 	     inbytesleft--;
163 	     outbytesleft--;
164 	     /* FIXME: Should the shift-state be reset? */
165 	     break;
166 #ifndef NON_GNU_ICONV
167 	   case 0:		       /* windows bug */
168 #endif
169 	   case E2BIG:
170 	     need_realloc = 1;
171 	     break;
172 	  }
173      }
174 
175    len = (unsigned int) (bufp - buf);
176    bufp = slrn_realloc (buf, len+1, 1);
177    if (bufp == NULL)
178      {
179 	slrn_free (buf);
180 	return fail_error;
181      }
182    bufp[len] = 0;
183    *outstrp = bufp;
184 
185    return 1;
186 }
187 #endif
188 
189 /* Guess a character set from the bytes in the string -- it returns a
190  * malloced string.
191  */
slrn_guess_charset(char * str,char * strmax)192 char *slrn_guess_charset (char *str, char *strmax)
193 {
194    char *charset = "us-ascii";
195 
196    while (str < strmax)
197      {
198 	unsigned int nconsumed;
199 	SLwchar_Type wch;
200 
201 	if ((*str & 0x80) == 0)
202 	  {
203 	     str++;
204 	     continue;
205 	  }
206 
207 	/* First see if it looks like UTF-8 */
208 	if (NULL != SLutf8_decode ((SLuchar_Type *)str, (SLuchar_Type *)strmax, &wch, &nconsumed))
209 	  {
210 	     charset = "UTF-8";
211 	     break;
212 	  }
213 
214 	charset = Slrn_Fallback_Input_Charset;
215 	if (charset == NULL)
216 	  charset = "iso-8859-1";
217 
218 	break;
219      }
220    return slrn_strmalloc (charset, 1);
221 }
222 
slrn_convert_string(char * from,char * str,char * strmax,char * to,int test)223 char *slrn_convert_string (char *from, char *str, char *strmax, char *to, int test)
224 {
225 #ifdef HAVE_ICONV
226    iconv_t cd;
227    int status;
228    char *substr;
229    int free_from = 0;
230 
231    if ((from == NULL)
232        || (0 == slrn_case_strcmp (from, "unknown-8bit"))
233        || (0 == slrn_case_strcmp (from, "x-user-defined")))
234      {
235 	from = slrn_guess_charset (str, strmax);
236 	if (from == NULL)
237 	  return NULL;
238 	free_from = 1;
239      }
240 
241    if ((cd = iconv_open(to, from)) == (iconv_t)(-1))
242      {
243 	if (test == 0)
244 	  slrn_error (_("Can't convert %s -> %s\n"), from, to);
245 
246 	if (free_from)
247 	  slrn_free (from);
248 
249 	return NULL;
250      }
251 
252    status = iconv_convert_string (cd, str, strmax-str, test, &substr);
253    iconv_close(cd);
254 
255    if (free_from)
256      slrn_free (from);
257 
258    if (status == 0)
259      return NULL;
260 
261    if (status == -1)
262      return NULL;
263 
264    return substr;
265 #else /* no iconv */
266 
267    char *s;
268 
269    if (from != NULL)
270      {
271 	if (0 == strcmp (to, from))
272 	  return slrn_strnmalloc (str, strmax-str, 1);
273      }
274 
275    if (test)
276      return NULL;
277 
278    /* Force it to us-ascii */
279    s = slrn_strnmalloc (str, strmax-str, 1);
280    if (s == NULL)
281      return NULL;
282 
283    str = s;
284    while (*s)
285      {
286 	if (*s & 0x80)
287 	  *s = '?';
288 	s++;
289      }
290    return str;
291 #endif
292 }
293 
slrn_convert_substring(char * str,unsigned int offset,unsigned int len,char * to_charset,char * from_charset,int test)294 char *slrn_convert_substring(char *str, unsigned int offset, unsigned int len, char *to_charset, char *from_charset, int test)
295 {
296    char *substr;
297    char *new_str;
298    unsigned int new_len;
299    unsigned int dlen;
300 
301    new_len = strlen (str);
302    if (len == 0)
303      return NULL;
304 
305    if (offset + len > new_len)
306      {
307 	slrn_error ("Internal Error in slrn_convert_substring");
308 	return NULL;		       /* internal error */
309      }
310 
311    substr = slrn_convert_string (from_charset, str+offset, str+offset+len,
312 				 to_charset, test);
313 
314    if (substr == NULL)
315      return NULL;
316 
317    dlen = strlen (substr);
318    new_len = (new_len - len) + dlen;
319    new_str = slrn_malloc (new_len + 1, 0, 1);
320    if (new_str == NULL)
321      {
322 	slrn_free (substr);
323 	return NULL;
324      }
325    strncpy (new_str, str, offset);
326    strcpy (new_str + offset, substr);
327    strcpy (new_str + offset + dlen, str + offset + len);
328    slrn_free (substr);
329    return new_str;
330 }
331 
slrn_test_and_convert_string(char * str,char ** dest,char * to_charset,char * from_charset)332 int slrn_test_and_convert_string(char *str, char **dest, char *to_charset, char *from_charset)
333 {
334    if (dest == NULL)
335 	return -1;
336 
337    *dest = NULL;
338 
339    if ((to_charset == NULL) || (from_charset == NULL))
340 	return 0;
341 
342    if (!slrn_string_nonascii(str))
343 	return 0;
344 
345    if(NULL == (*dest = slrn_convert_substring(str, 0, strlen (str), to_charset, from_charset, 0)))
346      return -1;
347 
348    return 0;
349 }
350 
slrn_convert_fprintf(FILE * fp,char * to_charset,char * from_charset,const char * format,...)351 int slrn_convert_fprintf(FILE *fp, char *to_charset, char *from_charset, const char *format, ... )
352 {
353    va_list args;
354    int retval;
355    char *str,*tmp;
356 
357    va_start (args, format);
358 
359    if ((to_charset == NULL) || (from_charset == NULL) || (slrn_case_strcmp(to_charset, from_charset) == 0))
360      {
361 	retval = vfprintf (fp, format, args);
362 	va_end (args);
363 	return retval;
364      }
365 
366    str = slrn_strdup_vprintf(format, args);
367    va_end (args);
368 
369    if (!slrn_string_nonascii(str))
370      {
371 	retval = fputs (str, fp);
372 	slrn_free(str);
373 	return retval;
374      }
375 
376    if (NULL == (tmp = slrn_convert_substring(str, 0, strlen (str), to_charset, from_charset, 0)))
377      {
378 	slrn_free(str);
379 	return -1;
380      }
381    retval = fputs (tmp, fp);
382    slrn_free(str);
383    slrn_free(tmp);
384 
385    return retval;
386 }
387 
388 #ifdef HAVE_ICONV
iconv_convert_newline(iconv_t cd)389 static void iconv_convert_newline (iconv_t cd)
390 {
391    char *nl = "\n";
392    char *tmp;
393 
394    if (1 == iconv_convert_string (cd, nl, 1, 1, &tmp))
395      slrn_free (tmp);
396 }
397 #endif
398 
399 /* converts a->lines */
slrn_convert_article(Slrn_Article_Type * a,char * to_charset,char * from_charset)400 int slrn_convert_article(Slrn_Article_Type *a, char *to_charset, char *from_charset)
401 {
402 #ifdef HAVE_ICONV
403    iconv_t cd;
404    char *tmp;
405    struct Slrn_Article_Line_Type *line=a->lines;
406 
407    if ((cd = iconv_open(to_charset, from_charset)) == (iconv_t)(-1))
408      {
409 	slrn_error (_("Can't convert %s -> %s\n"), from_charset, to_charset);
410 	return -1;
411      }
412 
413    /* Headers are handled elsewhere */
414    while ((line != NULL) && (line->flags & HEADER_LINE))
415      {
416 	line=line->next;
417      }
418 
419    while (line != NULL)
420      {
421 	if (1 == iconv_convert_string(cd, line->buf, strlen (line->buf), 0, &tmp))
422 	  {
423 	     slrn_free((char *) line->buf);
424 	     line->buf=tmp;
425 	     a->mime.was_modified=1;
426 	     iconv_convert_newline (cd);
427 	  }
428 	line=line->next;
429      }
430    iconv_close(cd);
431 #else
432    (void) a;
433    (void) to_charset;
434    (void) from_charset;
435 #endif
436    return 0;
437 }
438 
439  /* It returns 0 if it did not convert, 1 if it did, -1 upon error.
440   * Only those lines that have the 8bit flag set will be converted.
441   */
slrn_test_convert_lines(Slrn_Article_Line_Type * rlines,char * to_charset,char * from_charset,char ** badlinep)442 int slrn_test_convert_lines (Slrn_Article_Line_Type *rlines, char *to_charset, char *from_charset, char **badlinep)
443 {
444 #ifdef HAVE_ICONV
445    Slrn_Article_Line_Type *rline;
446    Slrn_Article_Line_Type *elines, *eline;
447    iconv_t cd;
448    int status;
449 
450    if ((cd = iconv_open(to_charset, from_charset)) == (iconv_t)(-1))
451      return 0;
452 
453    elines = eline = NULL;
454    rline = rlines;
455 
456    status = 0;
457    while (rline != NULL)
458      {
459 	Slrn_Article_Line_Type *next;
460 
461 	if (0 == (rline->flags & LINE_HAS_8BIT_FLAG))
462 	  {
463 	     rline = rline->next;
464 	     continue;
465 	  }
466 
467 	next = (Slrn_Article_Line_Type *) slrn_malloc (sizeof(Slrn_Article_Line_Type), 1, 1);
468 	if (next == NULL)
469 	  {
470 	     status = -1;
471 	     *badlinep = rline->buf;
472 	     goto free_return;
473 	  }
474 
475 	switch (iconv_convert_string (cd, rline->buf, strlen (rline->buf), 1, &next->buf))
476 	  {
477 	   case 1:		       /* line converted ok */
478 	     if (eline == NULL)
479 	       elines = next;
480 	     else
481 	       eline->next = next;
482 	     eline = next;
483 	     break;
484 
485 	   case 0:		       /* failed to convert */
486 	     if (Slrn_Debug_Fp != NULL)
487 	       {
488 		  (void) fprintf (Slrn_Debug_Fp, "*** iconv_convert_string failed to convert:\n");
489 		  (void) fprintf (Slrn_Debug_Fp, "%s\n", rline->buf);
490 		  (void) fprintf (Slrn_Debug_Fp, "*** from charset=%s to charset=%s\n", from_charset, to_charset);
491 		  (void) fflush (Slrn_Debug_Fp);
492 	       }
493 	     status = 0;
494 	     *badlinep = rline->buf;
495 	     slrn_art_free_line (next);
496 	     goto free_return;
497 
498 	   default:
499 	     status = -1;
500 	     *badlinep = rline->buf;
501 	     slrn_art_free_line (next);
502 	     goto free_return;
503 	  }
504 	rline=rline->next;
505      }
506 
507    /* Converted ok if we get here */
508    eline = elines;
509    rline = rlines;
510    while (rline != NULL)
511      {
512 	if (0 == (rline->flags & LINE_HAS_8BIT_FLAG))
513 	  {
514 	     rline = rline->next;
515 	     continue;
516 	  }
517 	slrn_free (rline->buf);
518 	rline->buf = eline->buf;
519 	eline->buf = NULL;
520 
521 	rline->flags &= ~LINE_HAS_8BIT_FLAG;
522 
523 	rline = rline->next;
524 	eline = eline->next;
525      }
526    status = 1;
527    /* drop */
528 
529 free_return:
530    iconv_close (cd);
531    while (elines != NULL)
532      {
533 	eline = elines;
534 	elines = elines->next;
535 	slrn_art_free_line (eline);
536      }
537    return status;
538 
539 #else
540    (void) rlines;
541    (void) to_charset;
542    (void) from_charset;
543    (void) badlinep;
544    return 1;
545 #endif
546 }
547 
548