1 /*
2  *  CHARSET.C
3  *
4  *  Written 1998 by Tobias Ernst. Released to the Public Domain.
5  *
6  *  A FSC-0054 / FSP-1013 compliant character set translation engine for MsgEd.
7  */
8 
9 #include <time.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <assert.h>
13 
14 #include "addr.h"
15 #include "memextra.h"
16 #include "strextra.h"
17 #include "nedit.h"
18 #include "charset.h"
19 #include "msged.h"
20 #include "config.h"
21 
22 
23 static READWRITEMAPS *readmaps=NULL, *writemaps=NULL;
24 static int toasc_encountered=0;
25 
26 static CHARSETALIAS *aliases = NULL;
27 static int naliases = 0;
28 
29 /* The maskout_table is a lookup table that simply replaces all
30    characters with an ASCII code >= 128 with question marks. It is
31    necessary to do this translation when reading a mail without
32    charset kludge, because untranslated special characters could
33    create strange effects in the UNIX version running in an xterm. */
34 
35 static LOOKUPTABLE maskout_table;
36 
37 
38 /* register an alias name for a charset kludge (for backward compatibility with
39    things like IBMPC, 7_FIDO or RUFIDO ... */
40 
charset_alias(const char * from,const char * to)41 void charset_alias (const char *from, const char *to)
42 {
43     if (!naliases)
44     {
45         aliases=xmalloc(sizeof(CHARSETALIAS));
46     }
47     else
48     {
49         aliases=realloc(aliases, sizeof(CHARSETALIAS) * (1+naliases));
50     }
51     naliases++;
52     strncpy(aliases[naliases-1].from_charset, from, 9);
53     aliases[naliases-1].from_charset[8] = '\0';
54     strncpy(aliases[naliases-1].to_charset, to, 9);
55     aliases[naliases-1].to_charset[8] = '\0';
56 }
57 
findalias(const char * kludge)58 static const char *findalias(const char *kludge)
59 {
60     int i;
61 
62     for (i = 0; i < naliases; i++)
63     {
64         if (!strcmp(aliases[i].from_charset, kludge))
65             return aliases[i].to_charset;
66     }
67     return kludge;
68 }
69 
70 
read_map(const char * filename)71 READWRITEMAPS *read_map(const char *filename)
72 {
73     READWRITEMAPS *map = xmalloc(sizeof(READWRITEMAPS));
74     FILE *fp = fopen(filename, "rb");
75     char temp[64];
76     int little_endian;
77     long file_length;
78     int i;
79 
80     map->tables = NULL;
81 
82     if (fp == NULL)
83     {
84        goto cleanup;           /* file not found - exit w/o error message  */
85     }
86 
87 
88     fseek(fp, 0, SEEK_END);    /* determine if the file has the right   */
89     file_length = ftell(fp);   /* size (12 + n*256) and the number n of */
90                                /* lookup tables                         */
91     if ((file_length - 12) % (256 + 32))
92     {
93        goto error;
94     }
95 
96     map->n_tables = (int) ((file_length - 12 ) / (256 + 32));
97     fseek(fp, 0, SEEK_SET);
98 
99     if (fread(temp, 12, 1, fp) != 1)   /* read the map file header        */
100     {
101        goto error;
102     }
103 
104     little_endian = (temp[0] == 1);   /* is the file for intel or vax?    */
105     map->charset_name[8] = 0;         /* determine the character set name */
106     memmove(map->charset_name, temp + 4, 8);
107 
108                                   /* allocate room for the lookup tables  */
109     map->tables = xcalloc(map->n_tables, sizeof(LOOKUPTABLE));
110 
111                                   /* read in the individual lookup tables */
112     for (i = 0; i < map->n_tables; i++)
113     {                             /* read in the table header             */
114         if (fread(temp, 32, 1, fp) != 1)
115         {
116             goto error;
117         }
118 
119         if (temp[0] || temp[1] || temp[2] || temp[3])     /* id must be 0 */
120         {
121             goto error;
122         }
123 
124         if (little_endian)
125         {                                       /* mod rev must be 0 or 1 */
126             if ((temp[4] != 1 && temp[4] != 0 ) || (temp[5] != 0) )
127             {
128                goto error;
129             }
130             map->tables[i].level = (temp[7] << 8 ) + temp[6];
131         }
132         else
133         {                                       /* mod rev must be 0 or 1 */
134             if ((temp[5] != 1 && temp[5] != 0 ) || (temp[4] != 0) )
135             {
136                goto error;
137             }
138             map->tables[i].level = (temp[6] << 8 ) + temp[7];
139         }
140 
141         map->tables[i].from_charset[8] = 0;
142         map->tables[i].to_charset[8] = 0;
143         memmove (map->tables[i].from_charset, temp+16, 8);
144         memmove (map->tables[i].to_charset, temp+24, 8);
145         if (stricmp(map->tables[i].to_charset,"ASCII") == 0)
146         {
147            toasc_encountered = 1;
148         }
149 
150                                               /* read in the table itself */
151         if (fread(map->tables[i].lookuptable, 256, 1, fp) != 1)
152         {
153             goto error;
154         }
155     }
156 
157     fclose (fp);
158     return map;
159 
160 error:
161     fprintf (stderr,
162              "\r\aError reading %s: Unrecognized or corrupt file format.\n",
163              filename);
164 cleanup:
165     if (fp != NULL)
166     {
167         fclose(fp);
168     }
169     if (map != NULL)
170     {
171         if (map->tables != NULL)
172         {
173             xfree(map->tables);
174         }
175         xfree(map);
176     }
177     return NULL;
178 }
179 
180 
read_charset_maps(char * readmap,char * writemap)181 void read_charset_maps(char *readmap, char *writemap)
182 {
183     int i;
184     char *fnr, *fnw;
185 
186     destroy_charset_maps();
187 
188                                 /* fill in the maskout table */
189     strcpy(maskout_table.from_charset, "ASCII");
190     strcpy(maskout_table.to_charset, "ASCII");
191     maskout_table.level = 2;
192     for (i = 0; i < 128; i++)
193     {
194         maskout_table.lookuptable[i * 2] = '\001';
195         maskout_table.lookuptable[i * 2 + 1] = '?';
196     }
197 
198     fnr = shell_expand(xstrdup(readmap));
199     fnw = shell_expand(xstrdup(writemap));
200 
201     readmaps=read_map(fnr);
202     toasc_encountered=0;
203     writemaps=read_map(fnw);
204 
205     if (readmaps == NULL || writemaps == NULL)
206     {
207         fprintf (stderr,
208       "\r\aWarning: Could not open %s \"%s\".",
209                  readmaps == NULL ? "read map" : "write map",
210                  readmaps == NULL ? fnr: fnw);
211         fprintf (stderr,
212                  "\n         You should correct this before you try "
213                             "to use umlauts, cyrillic"
214                  "\n         letters, accented characters, IBM graphics etc.\n");
215     }
216 
217     if (readmaps != NULL && writemaps != NULL)
218     {
219         if (!toasc_encountered)
220         {
221            fprintf (stderr,
222                     "\r\aWarning: %s does not contain an entry for converting"
223                     "\n         back to ASCII!\n", fnw);
224         }
225         if (strcmp(readmaps->charset_name, writemaps->charset_name) == 0)
226         {
227            printf ("\rIncorporating FSP 1013 charset engine. "
228                    "Local charset is: %s\n", readmaps->charset_name);
229            xfree(fnr);
230            xfree(fnw);
231            return;
232         }
233         else
234         {
235            fprintf (stderr, "\rError: readmaps.dat and writmaps.dat "
236                     "do not correspond in primary charset name.\n");
237         }
238     }
239     destroy_charset_maps();
240     xfree(fnr);
241     xfree(fnw);
242     return;
243 }
244 
destroy_charset_maps(void)245 void destroy_charset_maps(void)
246 {
247     if (readmaps!=NULL)
248     {
249         xfree(readmaps->tables);
250         xfree(readmaps);
251     }
252     if (writemaps!=NULL)
253     {
254         xfree(writemaps->tables);
255         xfree(writemaps);
256     }
257     readmaps=writemaps=NULL;
258 }
259 
260 
261 
262 /* Find a lookup table. Note: NULL pointer means no translation has to be
263    done. If you specify an unknown charset name, you will not get NULL
264    pointer, but you will get the maskout table (which maps everything to
265    a questionmark). If you do not want this, use have_readtable to test! */
266 
get_readtable(const char * charset_name,int level)267 LOOKUPTABLE *get_readtable (const char *charset_name, int level)
268 {
269     int i;
270 
271     charset_name = findalias(charset_name);
272 
273     if (readmaps == NULL)
274     {
275         return NULL;
276     }
277     if (!strcmp(readmaps->charset_name, charset_name))
278     {
279         return NULL;  /* no translation necessary */
280     }
281     for (i = 0; i < readmaps->n_tables; i++)  /* find an ideal table */
282     {
283         if (readmaps->tables[i].level == level &&
284             !strcmp(readmaps->tables[i].from_charset, charset_name) &&
285             !strcmp(readmaps->tables[i].to_charset, readmaps->charset_name) )
286         {
287             return readmaps->tables+i;
288         }
289     }
290     for (i = 0; i < readmaps->n_tables; i++)  /* find a table that at least */
291     {                                         /* translates to 7 bit ASCII  */
292         if (readmaps->tables[i].level == level &&
293             !strcmp(readmaps->tables[i].from_charset, charset_name) &&
294             !strcmp(readmaps->tables[i].to_charset, "ASCII") )
295         {
296             return readmaps->tables+i;
297         }
298     }
299     return &maskout_table;  /* can't help - mask out all characters >= 128 */
300 }
301 
get_writetable(const char * charset_name,int * allowed)302 LOOKUPTABLE * get_writetable(const char *charset_name, int *allowed)
303 {
304     int i;
305 
306     charset_name = findalias(charset_name);
307 
308     if (writemaps == NULL)
309     {
310         *allowed=0;
311         return NULL;
312     }
313     if (charset_name != NULL)
314     {
315         if (!strcmp(writemaps->charset_name, charset_name))
316         {
317             *allowed=1;
318             return NULL;  /* no translation necessary */
319         }
320         for (i = 0; i < writemaps->n_tables; i++)  /* find an ideal table */
321         {
322             if (!strcmp(writemaps->tables[i].to_charset, charset_name) &&
323                 !strcmp(writemaps->tables[i].from_charset,
324                         writemaps->charset_name) )
325             {
326                 *allowed=1;
327                 return writemaps->tables+i;
328             }
329         }
330     }
331     for (i = 0; i < writemaps->n_tables; i++) /* find a to ASCII table */
332     {
333         if (!strcmp(writemaps->tables[i].to_charset, "ASCII")&&
334             !strcmp(writemaps->tables[i].from_charset,
335                     writemaps->charset_name))
336         {
337             *allowed = 0; /* don't write CHRS kludge if we translate
338                              to ASCII anyway */
339             return writemaps->tables+i;
340         }
341     }
342     *allowed=0;
343     return NULL;  /* can't help */
344 }
345 
346 
347 
348 /* Test if we have a read table for this charset */
349 
have_readtable(const char * charset_name,int level)350 int have_readtable (const char *charset_name, int level)
351 {
352     return get_readtable(charset_name, level) != &maskout_table;
353 }
354 
355 
356 
357 /* this routine filters out control codes that could break vt100 */
358 
strip_control_chars(char * text)359 void strip_control_chars (char *text)
360 {
361 #if defined(UNIX) || defined(SASC)
362 
363     unsigned char c;
364     size_t dstidx, len;
365 
366     if (text == NULL) return;
367 
368     len  = strlen(text);
369 
370     for (dstidx = 0; dstidx < len; dstidx++)
371     {
372         c = *(unsigned char*)(text + dstidx);
373         if ( (c < 32 && c != '\n' && c != '\r' && c != '\001') ||
374             (c >= 128 && c < 160) )
375         {
376             text[dstidx] = '?';
377         }
378     }
379 #endif
380 }
381 
translate_text(const char * text,LOOKUPTABLE * table)382 char *translate_text (const char *text, LOOKUPTABLE *table)
383 {
384     size_t orglength, maxlength;
385     size_t srcidx = 0, dstidx = 0;
386     char * translated;
387     unsigned char tblidx;
388 
389     if (text == NULL)
390     {
391         return NULL;
392     }
393 
394     translated = xmalloc((maxlength = orglength = strlen(text)) +1);
395                        /* at first, we assume 1:1 translation */
396 
397 
398     if (table == NULL) /* no translation necessary or possible */
399     {
400        if (maxlength != 0)
401        {
402           memcpy (translated, text, maxlength);
403        }
404        translated[maxlength] = 0;
405        return translated;
406     }
407 
408     for (srcidx=dstidx=0; srcidx < orglength; srcidx++)
409     {
410         if (dstidx>=maxlength)
411         {
412             translated=realloc(translated, (maxlength += 40) + 1);
413         }
414 
415         switch (table->level)
416         {
417         case 1:
418             if (text[srcidx] & 0x80)   /* can't help here */
419             {
420                 translated[dstidx++] = text[srcidx];
421                 continue;
422             }
423             tblidx=(unsigned char)text[srcidx] * 2;
424             break; /* case */
425         case 2:
426             if (!(text[srcidx] & 0x80))   /* nothing to do here */
427             {
428                 translated[dstidx++] = text[srcidx];
429                 continue;
430             }
431             tblidx=((unsigned char)text[srcidx]-128) * 2;
432             break; /* case */
433         default:  /* other levels are not implemented */
434             translated[dstidx++] = text[srcidx];
435             continue; /* for */
436         }
437 
438         if (table->lookuptable[tblidx] >=0 &&
439             table->lookuptable[tblidx] <=1)
440         {
441             if (table->lookuptable[tblidx+1])
442             {
443                translated[dstidx++] = table->lookuptable[tblidx+1];
444             }
445             continue;
446         }
447 
448         if  (! (table->lookuptable[tblidx] >=2  &&   /* not a reserved */
449                 table->lookuptable[tblidx] <=32) )   /* character      */
450         {
451             translated[dstidx++] = table->lookuptable[tblidx];
452             if (dstidx>=maxlength)
453             {
454                translated=realloc(translated, (maxlength += 40) + 1);
455             }
456             translated[dstidx++] = table->lookuptable[tblidx+1];
457             continue;
458         }
459     }
460 
461     assert(dstidx <= maxlength);
462     translated[dstidx++]=0;
463 
464     return translated;
465 }
466 
467 
get_codepage_number(const char * kludge_name)468 int get_codepage_number(const char *kludge_name)
469 {
470     kludge_name = findalias(kludge_name);
471 
472     if (kludge_name[0] == 'C' && kludge_name[1] == 'P')
473         return atoi(kludge_name + 2);
474     else
475         return 0;
476 }
477 
get_local_charset(void)478 char *get_local_charset(void)
479 {
480     static char buffer[20];
481 
482     if (readmaps == NULL)
483     {
484         return NULL;
485     }
486 
487     sprintf (buffer, "%s 2", readmaps->charset_name);
488     return buffer;
489 }
490 
ct_comparator(const void * p1,const void * p2)491 static int ct_comparator(const void *p1, const void *p2)
492 {
493     return stricmp((const char *)p1, (const char *)p2);
494 }
495 
496 /* This function gets a human readable list of character set for which we have
497    read maps available. It can be used by the calling program to display a list
498    of these character sets, e.g. when offering the user a possibility to
499    override a character set kludge in the mail and to manually select the read
500    map to use.
501 
502    nelem and elem_size must not be NULL; they will be filled in with the number
503    of elements in the list and the size of each element (including a trailing
504    \0), respectively.
505 
506    The pointer that is returned has to be free'ed by the program.
507 */
508 
get_known_charset_table(int * nelem,int * elem_size)509 char *get_known_charset_table(int *nelem, int *elem_size)
510 {
511     char *array;
512     int i;
513 
514     if (nelem == NULL || elem_size == NULL || readmaps == NULL ||
515         readmaps->tables == NULL || readmaps->n_tables <= 0)
516     {
517         return NULL;
518     }
519 
520     *elem_size = 9 + 1 + 1; /* name, space, level */
521     *nelem = readmaps->n_tables;
522     array = malloc(((*nelem) + 1)* (*elem_size));
523 
524     for (i = 0; i < (*nelem); i++)
525     {
526         sprintf(array + i * (*elem_size), "%s %d",
527                 readmaps->tables[i].from_charset,
528                 readmaps->tables[i].level);
529     }
530 
531     sprintf (array + (*nelem) * (*elem_size), "%s 2",
532              readmaps->charset_name);
533     (*nelem)++;
534 
535     qsort(array, *nelem, *elem_size, ct_comparator);
536 
537     /* filter out duplicates */
538 
539     for (i = 0; i < (*nelem) - 1; i++)
540     {
541         if (!stricmp(array + i * (*elem_size),
542                      array + (i + 1) * (*elem_size)))
543         {
544             memmove(array + i * (*elem_size),
545                     array + (i + 1) * (*elem_size),
546                     ((*nelem) - i - 1) * (*elem_size));
547             (*nelem)--;
548         }
549     }
550 
551     return array;
552 }
553 
554 
555 
556