1 /*
2 * CHARSET.C
3 *
4 * Written 1998 by Tobias Ernst. Released to the Public Domain.
5 *
6 * A FSC-0054 / FSP-1013 compliant character set translation engine for MsgEd.
7 */
8
9 #include <time.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <assert.h>
13
14 #include "addr.h"
15 #include "memextra.h"
16 #include "strextra.h"
17 #include "nedit.h"
18 #include "charset.h"
19 #include "msged.h"
20 #include "config.h"
21
22
23 static READWRITEMAPS *readmaps=NULL, *writemaps=NULL;
24 static int toasc_encountered=0;
25
26 static CHARSETALIAS *aliases = NULL;
27 static int naliases = 0;
28
29 /* The maskout_table is a lookup table that simply replaces all
30 characters with an ASCII code >= 128 with question marks. It is
31 necessary to do this translation when reading a mail without
32 charset kludge, because untranslated special characters could
33 create strange effects in the UNIX version running in an xterm. */
34
35 static LOOKUPTABLE maskout_table;
36
37
38 /* register an alias name for a charset kludge (for backward compatibility with
39 things like IBMPC, 7_FIDO or RUFIDO ... */
40
charset_alias(const char * from,const char * to)41 void charset_alias (const char *from, const char *to)
42 {
43 if (!naliases)
44 {
45 aliases=xmalloc(sizeof(CHARSETALIAS));
46 }
47 else
48 {
49 aliases=realloc(aliases, sizeof(CHARSETALIAS) * (1+naliases));
50 }
51 naliases++;
52 strncpy(aliases[naliases-1].from_charset, from, 9);
53 aliases[naliases-1].from_charset[8] = '\0';
54 strncpy(aliases[naliases-1].to_charset, to, 9);
55 aliases[naliases-1].to_charset[8] = '\0';
56 }
57
findalias(const char * kludge)58 static const char *findalias(const char *kludge)
59 {
60 int i;
61
62 for (i = 0; i < naliases; i++)
63 {
64 if (!strcmp(aliases[i].from_charset, kludge))
65 return aliases[i].to_charset;
66 }
67 return kludge;
68 }
69
70
read_map(const char * filename)71 READWRITEMAPS *read_map(const char *filename)
72 {
73 READWRITEMAPS *map = xmalloc(sizeof(READWRITEMAPS));
74 FILE *fp = fopen(filename, "rb");
75 char temp[64];
76 int little_endian;
77 long file_length;
78 int i;
79
80 map->tables = NULL;
81
82 if (fp == NULL)
83 {
84 goto cleanup; /* file not found - exit w/o error message */
85 }
86
87
88 fseek(fp, 0, SEEK_END); /* determine if the file has the right */
89 file_length = ftell(fp); /* size (12 + n*256) and the number n of */
90 /* lookup tables */
91 if ((file_length - 12) % (256 + 32))
92 {
93 goto error;
94 }
95
96 map->n_tables = (int) ((file_length - 12 ) / (256 + 32));
97 fseek(fp, 0, SEEK_SET);
98
99 if (fread(temp, 12, 1, fp) != 1) /* read the map file header */
100 {
101 goto error;
102 }
103
104 little_endian = (temp[0] == 1); /* is the file for intel or vax? */
105 map->charset_name[8] = 0; /* determine the character set name */
106 memmove(map->charset_name, temp + 4, 8);
107
108 /* allocate room for the lookup tables */
109 map->tables = xcalloc(map->n_tables, sizeof(LOOKUPTABLE));
110
111 /* read in the individual lookup tables */
112 for (i = 0; i < map->n_tables; i++)
113 { /* read in the table header */
114 if (fread(temp, 32, 1, fp) != 1)
115 {
116 goto error;
117 }
118
119 if (temp[0] || temp[1] || temp[2] || temp[3]) /* id must be 0 */
120 {
121 goto error;
122 }
123
124 if (little_endian)
125 { /* mod rev must be 0 or 1 */
126 if ((temp[4] != 1 && temp[4] != 0 ) || (temp[5] != 0) )
127 {
128 goto error;
129 }
130 map->tables[i].level = (temp[7] << 8 ) + temp[6];
131 }
132 else
133 { /* mod rev must be 0 or 1 */
134 if ((temp[5] != 1 && temp[5] != 0 ) || (temp[4] != 0) )
135 {
136 goto error;
137 }
138 map->tables[i].level = (temp[6] << 8 ) + temp[7];
139 }
140
141 map->tables[i].from_charset[8] = 0;
142 map->tables[i].to_charset[8] = 0;
143 memmove (map->tables[i].from_charset, temp+16, 8);
144 memmove (map->tables[i].to_charset, temp+24, 8);
145 if (stricmp(map->tables[i].to_charset,"ASCII") == 0)
146 {
147 toasc_encountered = 1;
148 }
149
150 /* read in the table itself */
151 if (fread(map->tables[i].lookuptable, 256, 1, fp) != 1)
152 {
153 goto error;
154 }
155 }
156
157 fclose (fp);
158 return map;
159
160 error:
161 fprintf (stderr,
162 "\r\aError reading %s: Unrecognized or corrupt file format.\n",
163 filename);
164 cleanup:
165 if (fp != NULL)
166 {
167 fclose(fp);
168 }
169 if (map != NULL)
170 {
171 if (map->tables != NULL)
172 {
173 xfree(map->tables);
174 }
175 xfree(map);
176 }
177 return NULL;
178 }
179
180
read_charset_maps(char * readmap,char * writemap)181 void read_charset_maps(char *readmap, char *writemap)
182 {
183 int i;
184 char *fnr, *fnw;
185
186 destroy_charset_maps();
187
188 /* fill in the maskout table */
189 strcpy(maskout_table.from_charset, "ASCII");
190 strcpy(maskout_table.to_charset, "ASCII");
191 maskout_table.level = 2;
192 for (i = 0; i < 128; i++)
193 {
194 maskout_table.lookuptable[i * 2] = '\001';
195 maskout_table.lookuptable[i * 2 + 1] = '?';
196 }
197
198 fnr = shell_expand(xstrdup(readmap));
199 fnw = shell_expand(xstrdup(writemap));
200
201 readmaps=read_map(fnr);
202 toasc_encountered=0;
203 writemaps=read_map(fnw);
204
205 if (readmaps == NULL || writemaps == NULL)
206 {
207 fprintf (stderr,
208 "\r\aWarning: Could not open %s \"%s\".",
209 readmaps == NULL ? "read map" : "write map",
210 readmaps == NULL ? fnr: fnw);
211 fprintf (stderr,
212 "\n You should correct this before you try "
213 "to use umlauts, cyrillic"
214 "\n letters, accented characters, IBM graphics etc.\n");
215 }
216
217 if (readmaps != NULL && writemaps != NULL)
218 {
219 if (!toasc_encountered)
220 {
221 fprintf (stderr,
222 "\r\aWarning: %s does not contain an entry for converting"
223 "\n back to ASCII!\n", fnw);
224 }
225 if (strcmp(readmaps->charset_name, writemaps->charset_name) == 0)
226 {
227 printf ("\rIncorporating FSP 1013 charset engine. "
228 "Local charset is: %s\n", readmaps->charset_name);
229 xfree(fnr);
230 xfree(fnw);
231 return;
232 }
233 else
234 {
235 fprintf (stderr, "\rError: readmaps.dat and writmaps.dat "
236 "do not correspond in primary charset name.\n");
237 }
238 }
239 destroy_charset_maps();
240 xfree(fnr);
241 xfree(fnw);
242 return;
243 }
244
destroy_charset_maps(void)245 void destroy_charset_maps(void)
246 {
247 if (readmaps!=NULL)
248 {
249 xfree(readmaps->tables);
250 xfree(readmaps);
251 }
252 if (writemaps!=NULL)
253 {
254 xfree(writemaps->tables);
255 xfree(writemaps);
256 }
257 readmaps=writemaps=NULL;
258 }
259
260
261
262 /* Find a lookup table. Note: NULL pointer means no translation has to be
263 done. If you specify an unknown charset name, you will not get NULL
264 pointer, but you will get the maskout table (which maps everything to
265 a questionmark). If you do not want this, use have_readtable to test! */
266
get_readtable(const char * charset_name,int level)267 LOOKUPTABLE *get_readtable (const char *charset_name, int level)
268 {
269 int i;
270
271 charset_name = findalias(charset_name);
272
273 if (readmaps == NULL)
274 {
275 return NULL;
276 }
277 if (!strcmp(readmaps->charset_name, charset_name))
278 {
279 return NULL; /* no translation necessary */
280 }
281 for (i = 0; i < readmaps->n_tables; i++) /* find an ideal table */
282 {
283 if (readmaps->tables[i].level == level &&
284 !strcmp(readmaps->tables[i].from_charset, charset_name) &&
285 !strcmp(readmaps->tables[i].to_charset, readmaps->charset_name) )
286 {
287 return readmaps->tables+i;
288 }
289 }
290 for (i = 0; i < readmaps->n_tables; i++) /* find a table that at least */
291 { /* translates to 7 bit ASCII */
292 if (readmaps->tables[i].level == level &&
293 !strcmp(readmaps->tables[i].from_charset, charset_name) &&
294 !strcmp(readmaps->tables[i].to_charset, "ASCII") )
295 {
296 return readmaps->tables+i;
297 }
298 }
299 return &maskout_table; /* can't help - mask out all characters >= 128 */
300 }
301
get_writetable(const char * charset_name,int * allowed)302 LOOKUPTABLE * get_writetable(const char *charset_name, int *allowed)
303 {
304 int i;
305
306 charset_name = findalias(charset_name);
307
308 if (writemaps == NULL)
309 {
310 *allowed=0;
311 return NULL;
312 }
313 if (charset_name != NULL)
314 {
315 if (!strcmp(writemaps->charset_name, charset_name))
316 {
317 *allowed=1;
318 return NULL; /* no translation necessary */
319 }
320 for (i = 0; i < writemaps->n_tables; i++) /* find an ideal table */
321 {
322 if (!strcmp(writemaps->tables[i].to_charset, charset_name) &&
323 !strcmp(writemaps->tables[i].from_charset,
324 writemaps->charset_name) )
325 {
326 *allowed=1;
327 return writemaps->tables+i;
328 }
329 }
330 }
331 for (i = 0; i < writemaps->n_tables; i++) /* find a to ASCII table */
332 {
333 if (!strcmp(writemaps->tables[i].to_charset, "ASCII")&&
334 !strcmp(writemaps->tables[i].from_charset,
335 writemaps->charset_name))
336 {
337 *allowed = 0; /* don't write CHRS kludge if we translate
338 to ASCII anyway */
339 return writemaps->tables+i;
340 }
341 }
342 *allowed=0;
343 return NULL; /* can't help */
344 }
345
346
347
348 /* Test if we have a read table for this charset */
349
have_readtable(const char * charset_name,int level)350 int have_readtable (const char *charset_name, int level)
351 {
352 return get_readtable(charset_name, level) != &maskout_table;
353 }
354
355
356
357 /* this routine filters out control codes that could break vt100 */
358
strip_control_chars(char * text)359 void strip_control_chars (char *text)
360 {
361 #if defined(UNIX) || defined(SASC)
362
363 unsigned char c;
364 size_t dstidx, len;
365
366 if (text == NULL) return;
367
368 len = strlen(text);
369
370 for (dstidx = 0; dstidx < len; dstidx++)
371 {
372 c = *(unsigned char*)(text + dstidx);
373 if ( (c < 32 && c != '\n' && c != '\r' && c != '\001') ||
374 (c >= 128 && c < 160) )
375 {
376 text[dstidx] = '?';
377 }
378 }
379 #endif
380 }
381
translate_text(const char * text,LOOKUPTABLE * table)382 char *translate_text (const char *text, LOOKUPTABLE *table)
383 {
384 size_t orglength, maxlength;
385 size_t srcidx = 0, dstidx = 0;
386 char * translated;
387 unsigned char tblidx;
388
389 if (text == NULL)
390 {
391 return NULL;
392 }
393
394 translated = xmalloc((maxlength = orglength = strlen(text)) +1);
395 /* at first, we assume 1:1 translation */
396
397
398 if (table == NULL) /* no translation necessary or possible */
399 {
400 if (maxlength != 0)
401 {
402 memcpy (translated, text, maxlength);
403 }
404 translated[maxlength] = 0;
405 return translated;
406 }
407
408 for (srcidx=dstidx=0; srcidx < orglength; srcidx++)
409 {
410 if (dstidx>=maxlength)
411 {
412 translated=realloc(translated, (maxlength += 40) + 1);
413 }
414
415 switch (table->level)
416 {
417 case 1:
418 if (text[srcidx] & 0x80) /* can't help here */
419 {
420 translated[dstidx++] = text[srcidx];
421 continue;
422 }
423 tblidx=(unsigned char)text[srcidx] * 2;
424 break; /* case */
425 case 2:
426 if (!(text[srcidx] & 0x80)) /* nothing to do here */
427 {
428 translated[dstidx++] = text[srcidx];
429 continue;
430 }
431 tblidx=((unsigned char)text[srcidx]-128) * 2;
432 break; /* case */
433 default: /* other levels are not implemented */
434 translated[dstidx++] = text[srcidx];
435 continue; /* for */
436 }
437
438 if (table->lookuptable[tblidx] >=0 &&
439 table->lookuptable[tblidx] <=1)
440 {
441 if (table->lookuptable[tblidx+1])
442 {
443 translated[dstidx++] = table->lookuptable[tblidx+1];
444 }
445 continue;
446 }
447
448 if (! (table->lookuptable[tblidx] >=2 && /* not a reserved */
449 table->lookuptable[tblidx] <=32) ) /* character */
450 {
451 translated[dstidx++] = table->lookuptable[tblidx];
452 if (dstidx>=maxlength)
453 {
454 translated=realloc(translated, (maxlength += 40) + 1);
455 }
456 translated[dstidx++] = table->lookuptable[tblidx+1];
457 continue;
458 }
459 }
460
461 assert(dstidx <= maxlength);
462 translated[dstidx++]=0;
463
464 return translated;
465 }
466
467
get_codepage_number(const char * kludge_name)468 int get_codepage_number(const char *kludge_name)
469 {
470 kludge_name = findalias(kludge_name);
471
472 if (kludge_name[0] == 'C' && kludge_name[1] == 'P')
473 return atoi(kludge_name + 2);
474 else
475 return 0;
476 }
477
get_local_charset(void)478 char *get_local_charset(void)
479 {
480 static char buffer[20];
481
482 if (readmaps == NULL)
483 {
484 return NULL;
485 }
486
487 sprintf (buffer, "%s 2", readmaps->charset_name);
488 return buffer;
489 }
490
ct_comparator(const void * p1,const void * p2)491 static int ct_comparator(const void *p1, const void *p2)
492 {
493 return stricmp((const char *)p1, (const char *)p2);
494 }
495
496 /* This function gets a human readable list of character set for which we have
497 read maps available. It can be used by the calling program to display a list
498 of these character sets, e.g. when offering the user a possibility to
499 override a character set kludge in the mail and to manually select the read
500 map to use.
501
502 nelem and elem_size must not be NULL; they will be filled in with the number
503 of elements in the list and the size of each element (including a trailing
504 \0), respectively.
505
506 The pointer that is returned has to be free'ed by the program.
507 */
508
get_known_charset_table(int * nelem,int * elem_size)509 char *get_known_charset_table(int *nelem, int *elem_size)
510 {
511 char *array;
512 int i;
513
514 if (nelem == NULL || elem_size == NULL || readmaps == NULL ||
515 readmaps->tables == NULL || readmaps->n_tables <= 0)
516 {
517 return NULL;
518 }
519
520 *elem_size = 9 + 1 + 1; /* name, space, level */
521 *nelem = readmaps->n_tables;
522 array = malloc(((*nelem) + 1)* (*elem_size));
523
524 for (i = 0; i < (*nelem); i++)
525 {
526 sprintf(array + i * (*elem_size), "%s %d",
527 readmaps->tables[i].from_charset,
528 readmaps->tables[i].level);
529 }
530
531 sprintf (array + (*nelem) * (*elem_size), "%s 2",
532 readmaps->charset_name);
533 (*nelem)++;
534
535 qsort(array, *nelem, *elem_size, ct_comparator);
536
537 /* filter out duplicates */
538
539 for (i = 0; i < (*nelem) - 1; i++)
540 {
541 if (!stricmp(array + i * (*elem_size),
542 array + (i + 1) * (*elem_size)))
543 {
544 memmove(array + i * (*elem_size),
545 array + (i + 1) * (*elem_size),
546 ((*nelem) - i - 1) * (*elem_size));
547 (*nelem)--;
548 }
549 }
550
551 return array;
552 }
553
554
555
556