1 /*
2  *  MAKEMAPS.C
3  *
4  *  Written 1998 by Tobias Ernst. Released to the Public Domain.
5  *
6  *  This program generates the READMAPS.DAT and WRITMAPS.DAT files required
7  *  by MsgEd to do proper FSC-0054 character set translation.
8  *
9  *  Warning: This is spaghetti code. You normally don't have to use the program
10  *  anyway (I do supply ready-made binary map files), and you surely don't have
11  *  to mess around with this source code. :-)
12  */
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <ctype.h>
17 #include <string.h>
18 
19 extern int unlink(const char *);
20 
21 typedef struct _lookuptable
22 {
23     unsigned long  id;
24     unsigned short modrev;
25     unsigned short level;
26     char           from_charset[9];
27     char           to_charset[9];
28     char           lookuptable[256];
29 } LOOKUPTABLE;
30 
31 typedef struct _readwritemap
32 {
33     char           charset_name[9];
34     int            n_tables;
35     LOOKUPTABLE   *tables;
36 } READWRITEMAPS;
37 
38 
39 /* global variables */
40 
41 READWRITEMAPS readmaps, writmaps;
42 
43 
44 /*
45  * converts a string to upper case
46  *
47  */
48 
upcase(char * p)49 char* upcase(char *p)
50 {
51     char *o = p;
52     for (;*p;p++)
53     {
54         *p = toupper(*p);
55     }
56     return o;
57 }
58 
59 
60 /*
61  * malloc and realloc routines with built-in check for memory exhaustion.
62  *
63  */
64 
xmalloc(size_t size)65 void *xmalloc(size_t size)
66 {
67     void *ptr = malloc(size);
68     if (ptr == NULL)
69     {
70         fprintf (stderr, "OUT OF MEMORY\n");
71         abort();
72     }
73     return ptr;
74 }
75 
xstrdup(const char * string)76 char *xstrdup(const char *string)
77 {
78     char *ptr = strdup(string);
79     if (ptr == NULL)
80     {
81         fprintf (stderr, "OUT OF MEMORY\n");
82         abort();
83     }
84     return ptr;
85 }
86 
xrealloc(void * oldptr,size_t size)87 void *xrealloc(void *oldptr, size_t size)
88 {
89     void *ptr = realloc(oldptr, size);
90     if (ptr == NULL)
91     {
92         fprintf (stderr, "OUT OF MEMORY\n");
93         abort();
94     }
95     return ptr;
96 }
97 
98 
99 /*
100  * initialise the global variables
101  *
102  */
103 
init(char * charset_name)104 void init(char *charset_name)
105 {
106     size_t len = strlen(charset_name);
107 
108     if (len>8)
109     {
110         len = 8;
111     }
112 
113     memset(&readmaps, '\0', sizeof(readmaps));
114     memset(&writmaps, '\0', sizeof(writmaps));
115 
116     memmove(readmaps.charset_name, charset_name, len);
117     memmove(writmaps.charset_name, charset_name, len);
118     upcase(readmaps.charset_name);
119     upcase(writmaps.charset_name);
120 }
121 
122 
123 /*
124  * print a short help text
125  *
126  */
127 
usage(void)128 void usage(void)
129 {
130     printf ("Usage:\n\n");
131     printf ("        MAKEMAPS <charset-name> <chs-file> ...\n\n");
132     printf ("Where:\n\n");
133     printf ("        <charset-name> is the name of the charset this\n");
134     printf ("                       mapping file is for.\n");
135     printf ("        <chs-file>     is the name of a source file in\n");
136     printf ("                       .chs format defining a translation\n");
137     printf ("                       table. This parameter can and\n");
138     printf ("                       should be repeated.\n\n");
139     printf ("Example:\n\n");
140     printf ("         MAKEMAPS IBMPC IBM_ISO.CHS ISO_IBM.CHS IBM_ASC.CHS ASC_IBM.CHS\n\n");
141     printf ("On a UNIX-like shell with shell expansions, you can simply use:\n\n");
142     printf ("         ./makemaps LATIN-1 *.CHS *.chs\n");
143 }
144 
145 
146 /*
147  * Extract the first and second word from a line.
148  *
149  */
150 
parse_line(char * string,char ** word1,char ** word2)151 void parse_line (char *string, char **word1, char **word2)
152 {
153     int word = 0;
154     char *dup = xstrdup(string);
155     char *cp;
156     int noword = 1;
157 
158     *word1 = NULL;
159     *word2 = NULL;
160 
161     if (*string == ';')
162     {
163         return;
164     }
165 
166     for (cp = dup; *cp; cp++)
167     {
168         if (isspace(*cp))
169         {
170             *cp = '\0';
171             noword = 1;
172             if (word == 2)
173             {
174                 break;
175             }
176         }
177         else
178         {
179             if (noword)
180             {
181                 noword = 0;
182                 if (!word)
183                 {
184                     *word1 = cp;
185                     word = 1;
186                 }
187                 else
188                 {
189                     *word2 = cp;
190                     word = 2;
191                 }
192             }
193         }
194     }
195     if (*word1 != NULL)
196     {
197         *word1 = xstrdup(*word1);
198     }
199     if (*word2 != NULL)
200     {
201         *word2 = xstrdup(*word2);
202     }
203     free(dup);
204 }
205 
206 
207 /*
208  * parse_char parses a "character description" and returns the corresponding
209  * unsigned character. A character description must be either
210  *
211  *     - a single character or
212  *     - an escape sequence introduced with a backslash. The following escape
213  *        sequences are supported:
214  *        \\    -> a single backslash
215  *        \dnnn -> decimal notation of an ascii code
216  *        \xnn  -> hexadecimal notation of an ascii code
217  *
218  * examples:
219  *
220  *     0
221  *     \d48
222  *     \x30
223  *
224  * all refer to the same character.
225  *
226  * If parse_char returns 256, it means that an error occured. The filename
227  * parameter is passed to parse_char for error logging purposes only.
228  *
229  */
230 
parse_char(char * cp,char * filename)231 unsigned int parse_char(char *cp, char *filename)
232 {
233     unsigned char *ucp = (unsigned char *)cp;
234     unsigned int rv = 0;
235 
236     if (*ucp == '\\')
237     {
238         switch (*(++ucp))
239         {
240         case 'd':
241             for (++ucp; *ucp; ++ucp)
242             {
243                 if (!isdigit(*ucp))
244                 {
245                     printf ("%s: Error: Cannot parse character \"%s\".\n",
246                             filename, cp);
247                     return 256;
248                 }
249                 rv = rv * 10 + (*ucp - '0');
250             }
251             break;
252         case 'x':
253             for (++ucp; *ucp; ++ucp)
254             {
255                 if (!isxdigit(*ucp))
256                 {
257                     printf ("%s: Error: Cannot parse character \"%s\".\n",
258                             filename, cp);
259                     return 256;
260                 }
261                 if (isdigit(*ucp))
262                 {
263                     rv = rv * 16 + (*ucp - '0');
264                 }
265                 else
266                 {
267                     rv = rv * 16 + (toupper(*ucp) - 'A' + 10);
268                 }
269             }
270             break;
271         case '\\':
272             return '\\';
273             break;
274         case '0':
275             return 0;
276             break;
277         default:
278             printf ("%s: Error: Cannot parse character \"%s\".\n",
279                     filename, cp);
280             return 256;
281         }
282         if (rv >= 256)
283         {
284             printf ("%s: Error: Cannot parse character \"%s\".\n",
285                     filename, cp);
286             return 256;
287         }
288         else
289         {
290             return rv;
291         }
292     }
293     else
294     {
295         return *ucp;
296     }
297 }
298 
299 
300 /*
301  * The process() function reads in a .CHS file, parses it, and creates a
302  * corresponding LOOKUPTABLE entry in the tables array of either the readmaps
303  * or the writmaps variable.
304  *
305  */
306 
process(char * filename)307 int process(char *filename)
308 {
309     FILE *f = fopen(filename, "r");
310     int pos;
311     char buf[256];
312     char *word1, *word2;
313     LOOKUPTABLE ltable;
314     READWRITEMAPS *maps = NULL;
315     unsigned int tmp;
316 
317     if (f == NULL)
318     {
319         printf ("%-15s: Not found.\n", filename);
320         return 0;
321     }
322 
323     pos = 0;
324 
325     while ((!feof(f)) && (pos <= 133))
326     {
327         if (fgets(buf, sizeof(buf) - 1, f) != NULL)
328         {
329             parse_line(buf, &word1, &word2);
330             if (word1 == NULL)
331             {
332                 continue;
333             }
334             switch (pos)
335             {
336             case 0:
337                 ltable.id = atoi(word1);
338                 break;
339             case 1:
340                 ltable.modrev = atoi(word1);
341                 break;
342             case 2:
343                 ltable.level = atoi(word1);
344                 if (ltable.level < 1 || ltable.level > 2)
345                 {
346                     printf ("%s: ERROR: Charset level not 1 or 2.\n", filename);
347                     fclose(f);
348                     return 0;
349                 }
350                 break;
351             case 3:
352                 if (strlen(word1) > 8)
353                 {
354                     printf ("%s: ERROR: Charset name longer "
355                             "than 8 characters.\n", filename);
356                     fclose(f);
357                     return 0;
358                 }
359                 strcpy(ltable.from_charset, word1);
360                 upcase(ltable.from_charset);
361                 break;
362             case 4:
363                 if (strlen(word1) > 8)
364                 {
365                     printf ("%s: ERROR: Charset name longer "
366                             "than 8 characters.\n", filename);
367                     fclose(f);
368                     return 0;
369                 }
370                 strcpy(ltable.to_charset, word1);
371                 upcase(ltable.to_charset);
372                 if (!strcmp(ltable.from_charset, writmaps.charset_name))
373                 {
374                     maps = &writmaps;
375                     printf ("%s: Using for WRITMAPS.DAT.\n", filename);
376                 }
377 		else
378                 if ((!strcmp(ltable.to_charset, readmaps.charset_name)) ||
379                    (!strcmp(ltable.to_charset, "ASCII")))
380                 {
381                     maps = &readmaps; /* read */
382                     printf ("%s: Using for READMAPS.DAT.\n", filename);
383                 }
384                 else
385                 {
386                     printf ("%s: Skipping (no matching charset).\n", filename);
387                     fclose(f);
388                     return 1;
389                 }
390                 break;
391             case 133:
392                 if (strcmp(upcase(word1), "END"))
393                 {
394                     printf ("%s: ERROR: Expected END statement not found.\n",
395                             filename);
396                     fclose(f);
397                     return 0;
398                 }
399                 break;
400             default:
401                 if ((tmp = parse_char(word1, filename)) >= 256)
402                 {
403                     fclose(f);
404                     return 0;
405                 }
406                 if (word2 == NULL)
407                 {
408                     printf ("%s: ERROR: Syntax error.\n", filename);
409                     fclose(f);
410                     return(0);
411                 }
412                 ltable.lookuptable[2*(pos-5)] = tmp;
413                 if ((tmp = parse_char(word2, filename)) >= 256)
414                 {
415                     fclose(f);
416                     return 0;
417                 }
418                 ltable.lookuptable[(2*(pos-5)) + 1] = tmp;
419                 break;
420             }
421             pos++;
422             free(word1);
423             if (word2 != NULL)
424             {
425                 free(word2);
426             }
427         }
428     }
429     if (maps->tables == NULL)
430     {
431         maps->tables = malloc(sizeof(LOOKUPTABLE) * (maps->n_tables + 1));
432     }
433     else
434     {
435         maps->tables = realloc(maps->tables,
436                                 sizeof(LOOKUPTABLE) * (maps->n_tables + 1));
437     }
438     memmove(maps->tables + maps->n_tables, &ltable, sizeof(LOOKUPTABLE));
439     maps->n_tables++;
440     fclose(f);
441     return 1;
442 }
443 
444 
445 /*
446  * save_table writes a given lookup table to disk.
447  *
448  */
449 
save_table(FILE * f,LOOKUPTABLE * pltable)450 int save_table(FILE *f, LOOKUPTABLE *pltable)
451 {
452     unsigned char raw[288];
453     unsigned int i;
454 
455     memset(raw, '\0', sizeof(raw));
456     raw[4] = 1; /* module revision */
457     raw[6] = pltable->level & 0xFF;
458     raw[7] = (pltable->level >> 8) & 0xFF;
459     memmove(raw + 16, pltable->from_charset, strlen(pltable->from_charset));
460     memmove(raw + 24, pltable->to_charset, strlen(pltable->to_charset));
461 
462     for (i=0; i<256; i++)
463     {
464         raw[i+32] = pltable->lookuptable[i];
465     }
466 
467     return (fwrite(raw, sizeof(raw), 1, f) == 1);
468 }
469 
470 
471 /*
472  * sort_read is a qsort helper function for sorting the readmaps,
473  * sort_write is a qsort helper function for sorting the writmaps.
474  *
475  */
476 
sort_read(const void * p1,const void * p2)477 int sort_read(const void *p1, const void *p2)
478 {
479     const LOOKUPTABLE *pl1 = p1, *pl2 = p2;
480     int i;
481     if (pl1->level < pl2->level)
482     {
483         return -1;
484     }
485     if (pl1->level > pl2->level)
486     {
487         return 1;
488     }
489     if ((i = strcmp(pl1->from_charset, pl2->from_charset)) != 0)
490     {
491         return i;
492     }
493     if (!strcmp(pl1->from_charset, "ASCII"))
494     {
495         return 1;
496     }
497     else if (!strcmp(pl2->from_charset, "ASCII"))
498     {
499         return -1;
500     }
501     else
502     {
503         return 0;
504     }
505 }
506 
sort_writ(const void * p1,const void * p2)507 int sort_writ(const void *p1, const void *p2)
508 {
509     const LOOKUPTABLE *pl1 = p1, *pl2 = p2;
510 
511     if (pl1->level < pl2->level)
512     {
513         return -1;
514     }
515     if (pl1->level > pl2->level)
516     {
517         return 1;
518     }
519     return (strcmp(pl1->to_charset, pl2->to_charset));
520 }
521 
522 
523 /*
524  * save writes the whole thing to disk
525  *
526  */
527 
save(void)528 int save(void)
529 {
530     FILE *fr = fopen("readmaps.dat","wb");
531     FILE *fw = fopen("writmaps.dat","wb");
532     unsigned char header[12];
533     int i;
534 
535     if (fr == NULL)
536     {
537         fprintf (stderr, "Cannot write readmaps.dat.\n");
538         goto erro;
539     }
540     if (fw == NULL)
541     {
542         fprintf (stderr, "Cannot write writmaps.dat.\n");
543         goto erro;
544     }
545 
546     memset(header, '\0', sizeof(header));
547     header[0] = 1;  /* We always write a little endian style file. MsgEd TE
548                        will be able to read this even on a big endian machine,
549                        and we are able to create it even on a big endian
550                        machine, so there is no problem. */
551     memmove(header+4, readmaps.charset_name, strlen(readmaps.charset_name));
552     if (fwrite(header, sizeof(header), 1, fr) != 1)
553     {
554         goto erro;
555     }
556     memset(header + 4, '\0', sizeof(header) - 4);
557     memmove(header+4, writmaps.charset_name, strlen(writmaps.charset_name));
558     if (fwrite(header, sizeof(header), 1, fw) != 1)
559     {
560         goto erro;
561     }
562 
563     qsort(readmaps.tables, readmaps.n_tables, sizeof(LOOKUPTABLE), sort_read);
564     for (i=0; i<readmaps.n_tables; i++)
565     {
566         if (!save_table(fr, readmaps.tables + i))
567         {
568             goto erro;
569         }
570     }
571 
572     qsort(writmaps.tables, writmaps.n_tables, sizeof(LOOKUPTABLE), sort_writ);
573     for (i=0; i<writmaps.n_tables; i++)
574     {
575         if (!save_table(fw, writmaps.tables + i))
576         {
577             goto erro;
578         }
579     }
580 
581     fclose(fw);
582     fclose(fr);
583     return 1;
584 
585 erro:
586     fprintf (stderr, "File I/O error.\n");
587     if (fw == NULL || fr == NULL)
588     {
589         if (fw != NULL)
590         {
591             fclose(fw);
592         }
593         if (fr != NULL)
594         {
595             fclose(fr);
596         }
597     }
598     unlink("readmaps.dat");
599     unlink("writmaps.dat");
600     return 0;
601 }
602 
main(int argc,char ** argv)603 int main(int argc, char **argv)
604 {
605     int i;
606 
607     if (argc < 3)
608     {
609         usage(); return 8;
610     }
611     if (strlen(argv[1]) > 8)
612     {
613         fprintf (stderr,
614                  "Error: Character set name is longer than 8 characters.\n");
615         return 8;
616     }
617 
618     init(argv[1]);
619     for (i = 2; i < argc; i++)
620     {
621         if (!process(argv[i]))
622         {
623             return 8;
624         }
625     }
626 
627     i = save();
628 
629     if (readmaps.tables != NULL)
630     {
631         free(readmaps.tables);
632     }
633     if (writmaps.tables != NULL)
634     {
635         free(writmaps.tables);
636     }
637     return (!i) ? 8 : 0;
638 }
639 
640 
641 
642 
643