1 /*:ts=8*/
2 /*****************************************************************************
3  * FIDOGATE --- Gateway UNIX Mail/News <-> FTN NetMail/EchoMail
4  *
5  *
6  * NEW charset.c code using charset.bin mapping file
7  *
8  *****************************************************************************
9  * Copyright (C) 1990-2001
10  *  _____ _____
11  * |     |___  |   Martin Junius             FIDO:      2:2452/110
12  * | | | |   | |   Radiumstr. 18             Internet:  mj@fido.de
13  * |_|_|_|@home|   D-51069 Koeln, Germany
14  *
15  * This file is part of FIDOGATE.
16  *
17  * FIDOGATE is free software; you can redistribute it and/or modify it
18  * under the terms of the GNU General Public License as published by the
19  * Free Software Foundation; either version 2, or (at your option) any
20  * later version.
21  *
22  * FIDOGATE is distributed in the hope that it will be useful, but
23  * WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with FIDOGATE; see the file COPYING.  If not, write to the Free
29  * Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
30  *****************************************************************************/
31 
32 #include "fidogate.h"
33 
34 /* New code reuses original structures */
35 
36 /* fidogate.conf aliases to canonical fsc charset name */
37 static CharsetAlias *fsc_aliases;
38 static CharsetAlias *charset_name_map;
39 
40 static char *orig_in;
41 static char *orig_out;
42 
43 /*
44  * Translate string
45  */
xlat_s(char * s1,char * s2)46 char *xlat_s(char *s1, char *s2)
47 {
48     char *dst = NULL;
49     size_t src_len;
50     size_t dst_len;
51     int rc;
52 
53     if (s2 != NULL)
54         free(s2);
55 
56     if (s1 == NULL)
57         return NULL;
58 
59     src_len = strlen(s1);
60     src_len++;                  /* recode also final \0 */
61 
62     rc = charset_recode_buf(&dst, &dst_len, s1, src_len, orig_in, orig_out);
63     if (rc == OK)
64         return dst;
65 
66     free(dst);
67     return NULL;
68 }
69 
70 /*
71  * Set character mapping table
72  */
charset_set_in_out(char * in,char * out)73 void charset_set_in_out(char *in, char *out)
74 {
75     if (!in || !out)
76         return;
77 
78     debug(5, "charset: in=%s out=%s", in, out);
79 
80     orig_in = in;
81     orig_out = out;
82 }
83 
charset_fsc_aliases_add(char ** list)84 static void charset_fsc_aliases_add(char **list)
85 {
86     char *name = *list++;
87     char *alias;
88     CharsetAlias *p;
89 
90     for (alias = *list; alias; alias = *++list) {
91         p = xmalloc(sizeof(*p));
92         snprintf(p->name, sizeof(p->name), "%s", name);
93         snprintf(p->alias, sizeof(p->alias), "%s", alias);
94 
95         p->next = fsc_aliases;
96         fsc_aliases = p;
97 
98         debug(15, "Adding FSC alias %s -> %s\n", alias, name);
99     }
100 }
101 
charset_fsc_canonize(char * chrs)102 char *charset_fsc_canonize(char *chrs)
103 {
104     CharsetAlias *p;
105 
106     for (p = fsc_aliases; p; p = p->next) {
107         if (streq(chrs, p->alias))
108             return p->name;
109     }
110     return chrs;
111 }
112 
charset_fsc_aliases_init(void)113 static void charset_fsc_aliases_init(void)
114 {
115     int first = TRUE, next = FALSE;
116     char *p;
117     char **list = NULL;
118 
119     for (p = cf_get_string("CharsetAliasesFSC", first);
120          p; p = cf_get_string("CharsetAliasesFSC", next)) {
121 
122         list_init(&list, p);
123 
124         if (*list == NULL) {
125             fglog("ERROR: CharsetAliasesFSC requires <name> <list>\n");
126             continue;
127         }
128 
129         charset_fsc_aliases_add(list);
130     }
131 
132     list_free(list);
133 }
134 
charset_name_rfc2ftn(char * chrs)135 char *charset_name_rfc2ftn(char *chrs)
136 {
137     CharsetAlias *p;
138 
139     for (p = charset_name_map; p; p = p->next) {
140         if (streq(chrs, p->name))
141             return p->alias;
142     }
143     return chrs;
144 }
145 
charset_name_map_add(char ** list)146 static void charset_name_map_add(char **list)
147 {
148     char *rfc = list[0];
149     char *fsc = list[1];
150     CharsetAlias *p;
151 
152     p = xmalloc(sizeof(*p));
153     snprintf(p->name, sizeof(p->name), "%s", rfc);
154     snprintf(p->alias, sizeof(p->alias), "%s", fsc);
155 
156     p->next = charset_name_map;
157     charset_name_map = p;
158 
159     debug(15, "Adding charset name map %s -> %s\n", rfc, fsc);
160 }
161 
charset_name_map_init(void)162 static void charset_name_map_init(void)
163 {
164     int first = TRUE, next = FALSE;
165     char *p;
166     char **list = NULL;
167 
168     for (p = cf_get_string("CharsetNameMap", first);
169          p; p = cf_get_string("CharsetNameMap", next)) {
170 
171         list_init(&list, p);
172 
173         if ((list[0] == NULL) || (list[1] == 0)) {
174             fglog("ERROR: Syntax CharsetNameMap <RFC name> <FSC name>\n");
175             continue;
176         }
177 
178         charset_name_map_add(list);
179     }
180 
181     list_free(list);
182 }
183 
184 /*
185  * Initialize charset mapping
186  */
charset_init(void)187 void charset_init(void)
188 {
189     charset_fsc_aliases_init();
190     charset_name_map_init();
191 }
192 
193 struct str_to_str {
194     char *key;
195     char *val;
196 };
197 
198 static struct str_to_str level1_map[] = {
199     {"ASCII", "ASCII"},
200     {"DUTCH", "ISO646-DK"},
201     {"FINNISH", "ISO646-FI"},
202     {"FRENCH", "ISO646-FR"},
203     {"CANADIAN", "ISO646-CA"},
204     {"GERMAN", "ISO646-DE"},
205     {"ITALIAN", "ISO646-IT"},
206     {"NORWEIG", "ISO646-NO"},
207     {"PORTU", "ISO646-PT"},
208     {"SPANISH", "ISO646-ES"},
209     {"SWEDISH", "ISO646-SE"},
210     {"SWISS", "ISO646-CN"},
211     {"UK", "ISO646-GB"},
212 };
213 
charset_level1_to_iconv(char * charset)214 static char *charset_level1_to_iconv(char *charset)
215 {
216     int i;
217 
218     for (i = 0; i < sizeof(level1_map) / sizeof(level1_map[0]); i++)
219         if (stricmp(level1_map[i].key, charset) == 0)
220             return level1_map[i].val;
221     return NULL;
222 }
223 
224 /*
225  * Get charset name from ^ACHRS kludge line
226  */
charset_chrs_name(char * s)227 char *charset_chrs_name(char *s)
228 {
229     static char name[MAXPATH];
230     char *p;
231     int level;
232 
233     while (is_space(*s))
234         s++;
235     debug(5, "FSC-0054 ^ACHRS/CHARSET: %s", s);
236 
237     BUF_COPY(name, s);
238     p = strtok(name, " \t");
239     if (!p)
240         return NULL;
241 
242     p = strtok(NULL, " \t");
243     if (!p)
244         /* In this case it's an FSC-0050 kludge without the class code.
245          * Treat it like FSC-0054 level 2. */
246         level = 2;
247     else
248         level = atoi(p);
249 
250     switch (level) {
251     case 1:
252         p = charset_level1_to_iconv(name);
253         debug(5, "FSC-0054 level 1 charset=%s (level 2: %s)", name, p);
254         return p;
255 
256     default:
257         debug(5, "FSC-0054 level %d charset=%s", level, name);
258         return name;
259     }
260 
261     return NULL;
262 }
263 
charset_fsc_aliases_free(void)264 static void charset_fsc_aliases_free(void)
265 {
266     CharsetAlias *pa, *pa1;
267 
268     for (pa = fsc_aliases; pa; pa = pa1) {
269         pa1 = pa->next;
270         free(pa);
271     }
272 }
273 
charset_name_map_free(void)274 static void charset_name_map_free(void)
275 {
276     CharsetAlias *pa, *pa1;
277 
278     for (pa = charset_name_map; pa; pa = pa1) {
279         pa1 = pa->next;
280         free(pa);
281     }
282 }
283 
charset_free(void)284 void charset_free(void)
285 {
286     charset_fsc_aliases_free();
287     charset_name_map_free();
288 }
289 
_charset_recode_iconv(char ** res,size_t * res_len,char * src,size_t src_len,char * from,char * _to)290 static int _charset_recode_iconv(char **res, size_t *res_len,
291                                  char *src, size_t src_len,
292                                  char *from, char *_to)
293 {
294     int rc;
295     iconv_t desc;
296     size_t size;
297     char *to;
298     char *dst;
299     size_t dst_size;
300     size_t inc = src_len;
301     char *cur;
302     size_t cur_size;
303     size_t dst_len;             /* successfuly converted to dst */
304 
305     debug(6, "Using ICONV");
306 
307     size = strlen(_to) + sizeof("//TRANSLIT");
308     to = xmalloc(size);
309     sprintf(to, "%s//TRANSLIT", _to);
310 
311     desc = iconv_open(to, from);
312     if (desc == (iconv_t) - 1) {
313         debug(6, "WARNING: iconv cannot convert from %s to %s", from, to);
314         return ERROR;
315     }
316 
317     dst_size = src_len;
318     dst = xmalloc(dst_size);
319     cur = dst;
320     cur_size = dst_size;
321 
322     while (src_len > 0) {
323         rc = iconv(desc, &src, &src_len, &cur, &cur_size);
324         if (rc != -1)
325             break;
326 
327         if (errno != E2BIG) {
328             src++;
329             src_len--;
330             *cur++ = '?';
331             cur_size--;
332             continue;
333         }
334 
335         /* after iconv call cur_size contains size of unused space */
336         dst_len = dst_size - cur_size;
337         dst = xrealloc(dst, dst_size + inc);
338         dst_size += inc;
339         cur = dst + dst_len;
340         /* unused + new */
341         cur_size += inc;
342     }
343 
344     /*
345      * write sequence to get to the initial state if needed
346      * https://www.gnu.org/software/libc/manual/html_node/iconv-Examples.html
347      */
348     iconv(desc, NULL, NULL, &cur, &cur_size);
349     dst_len = dst_size - cur_size;
350     iconv_close(desc);
351     free(to);
352 
353     *res = dst;
354     *res_len = dst_len;
355 
356     return OK;
357 }
358 
charset_recode_iconv(char ** dst,size_t * dstlen,char * src,size_t srclen,char * from,char * to)359 static int charset_recode_iconv(char **dst, size_t *dstlen,
360                                 char *src, size_t srclen, char *from, char *to)
361 {
362     int rc;
363     char *p;
364     char *buf;
365     size_t len;
366     size_t off;
367 
368     rc = _charset_recode_iconv(dst, dstlen, src, srclen, from, to);
369     if (rc == OK)
370         return OK;
371 
372     /* Heuristic, LATIN-1 -> LATIN1 */
373     p = strchr(from, '-');
374     if (p == NULL)
375         return ERROR;
376 
377     off = p - from;
378     len = strlen(from);
379 
380     buf = xmalloc(len + 1);
381     memcpy(buf, from, off);
382     memcpy(buf + off, p + 1, len - off - 1);
383     buf[len - 1] = '\0';
384 
385     rc = _charset_recode_iconv(dst, dstlen, src, srclen, buf, to);
386     free(buf);
387 
388     return rc;
389 }
390 
391 /*
392  * Gets source buffer, lenght of it, allocates buffer for the result.
393  * Return dst -- allocated buffer
394  *        dstlen -- number of used bytes in it
395  * The argument's order is like in str/mem functions
396  *
397  * Adjusts given length to string's length
398  */
charset_recode_buf(char ** dst,size_t * dstlen,char * src,size_t srclen,char * from,char * to)399 int charset_recode_buf(char **dst, size_t *dstlen,
400                        char *src, size_t srclen, char *from, char *to)
401 {
402     if (src == NULL || dst == NULL)
403         return ERROR;
404 
405     if (srclen == 0)
406         return ERROR;
407 
408     debug(6, "mime charset: recoding from %s to %s", from, to);
409 
410     if (strieq(from, to)) {
411         *dst = xmalloc(srclen);
412         memcpy(*dst, src, srclen);
413         *dstlen = srclen;
414         return OK;
415     }
416 
417     return charset_recode_iconv(dst, dstlen, src, srclen, from, to);
418 }
419 
charset_is_7bit(char * buffer,size_t len)420 int charset_is_7bit(char *buffer, size_t len)
421 {
422     int i;
423 
424     if (buffer == NULL)
425         return TRUE;
426 
427     for (i = 0; i < len; i++)
428         if (buffer[i] & 0x80)
429             return FALSE;
430     return TRUE;
431 }
432 
433 enum utf8_state {
434     START_SEQ,
435     PROCESS_SEQ,
436     FINISH,
437     ERR,
438 };
439 
utf8_check_start(unsigned char c,size_t * n)440 static enum utf8_state utf8_check_start(unsigned char c, size_t *n)
441 {
442     size_t num;
443 
444     if ((c & 0x80) == 0)
445         num = 1;
446     else if (((c & 0xc0) == 0xc0) && ((c & 0x20) == 0))
447         num = 2;
448     else if (((c & 0xe0) == 0xe0) && ((c & 0x10) == 0))
449         num = 3;
450     else if (((c & 0xf0) == 0xf0) && ((c & 0x08) == 0))
451         num = 4;
452     else
453         return ERR;
454 
455     *n = num;
456     return PROCESS_SEQ;
457 }
458 
utf8_check_rest_byte(unsigned char c)459 static bool utf8_check_rest_byte(unsigned char c)
460 {
461     return ((c & 0x80) == 0x80) && ((c & 0x40) == 0);
462 }
463 
utf8_check_rest_bytes(char * s,size_t len,size_t i,size_t num)464 static bool utf8_check_rest_bytes(char *s, size_t len, size_t i, size_t num)
465 {
466     while (num--) {
467         if (s[i] == '\0' || i == len)
468             return false;
469         if (!utf8_check_rest_byte(s[i]))
470             return false;
471         i++;
472     }
473     return true;
474 }
475 
charset_is_valid_utf8(char * s,size_t len)476 bool charset_is_valid_utf8(char *s, size_t len)
477 {
478     enum utf8_state state = START_SEQ;
479     size_t i;
480     size_t num;
481     static const void *const states[] = {
482         [START_SEQ] = &&START_SEQ,
483         [PROCESS_SEQ] = &&PROCESS_SEQ,
484         [FINISH] = &&FINISH,
485         [ERR] = &&ERR,
486     };
487 
488     i = 0;
489     goto START_SEQ;
490 
491  START_SEQ:
492     if (s[i] == '\0' || i == len)
493         goto FINISH;
494     state = utf8_check_start(s[i], &num);
495     goto *states[state];
496 
497  PROCESS_SEQ:
498     i++;
499     num--;
500     if (!utf8_check_rest_bytes(s, len, i, num))
501         goto ERR;
502     i += num;
503     goto START_SEQ;
504 
505  FINISH:
506     return true;
507  ERR:
508     return false;
509 }
510