1 /*
2    language.c - Foreign language translation for PGP
3    Finds foreign language "subtitles" for English phrases
4    in external foriegn language text file.
5 
6    (c) Copyright 1990-1996 by Philip Zimmermann.  All rights reserved.
7    The author assumes no liability for damages resulting from the use
8    of this software, even if the damage results from defects in this
9    software.  No warranty is expressed or implied.
10 
11    Note that while most PGP source modules bear Philip Zimmermann's
12    copyright notice, many of them have been revised or entirely written
13    by contributors who frequently failed to put their names in their
14    code.  Code that has been incorporated into PGP from other authors
15    was either originally published in the public domain or is used with
16    permission from the various authors.
17 
18    PGP is available for free to the public under certain restrictions.
19    See the PGP User's Guide (included in the release package) for
20    important information about licensing, patent restrictions on
BOOST_SCOPED_ENUM_DECLARE_BEGIN(timezone)21    certain algorithms, trademarks, copyrights, and export controls.
22  */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <ctype.h>
28 #include "usuals.h"
29 #include "fileio.h"
30 #include "language.h"
31 #include "pgp.h"
32 #include "charset.h"
33 #include "armor.h"
34 
35 #define SUBTITLES_FILE	"language.txt"
36 #define LANG_INDEXFILE	"language.idx"
37 
38 #define	STRBUFSIZE		2048
39 
40 char language[16] = "en";	/* The language code, defaults to English */
41 static char *strbuf;
42 static char lang[16];		/* readstr sets this to the language id of
43 				   the msg it last read */
44 static int subtitles_available = 0;
45 static int line = 0;
46 /*      subtitles_available is used to determine if we know whether the special
47    subtitles_file exists.  subtitles_available has the following values:
48    0  = first time thru, we don't yet know if subtitles_file exists.
49    1  = we have already determined that subtitles_file exists.
50    -1 = we have already determined that subtitles_file does not exist.
51  */
52 
53 #define	NEWLINE		0
54 #define	COMMENT		1
55 #define	INSTRING	2
56 #define	ESCAPE		3
57 #define	IDENT		4
58 #define	DONE		5
59 #define	ERROR		6
60 #define	ERR1		7
61 
62 /* Look for and return a quoted string from the file.
63  * If nlabort is true, return failure if we find a blank line
64  * before we find the opening quote.
65  */
66 static char *
67  readstr(FILE * f, char *buf, int nlabort)
68 {
69     int c, d;
70     char *p = buf;
71     int state = NEWLINE;
72     int i = 0;
73 
74     while ((c = getc(f)) != EOF) {
75 	if (c == '\r')
76 	    continue;
77 	/* line numbers are only incremented when creating index file */
78 	if (line && c == '\n')
79 	    ++line;
80 	switch (state) {
81 	case NEWLINE:
82 	    switch (c) {
83 	    case '#':
84 		state = COMMENT;
85 		break;
86 	    case '"':
87 		state = INSTRING;
88 		break;
89 	    case '\n':
90 		if (nlabort) {
91 		    *buf = '\0';
92 		    return buf;
93 		}
94 	    default:
95 		if (i == 0 && isalnum(c)) {
96 		    state = IDENT;
97 		    lang[i++] = c;
98 		    break;
99 		}
100 		if (!isspace(c)) {
101 		    fprintf(stderr, "language.txt:%d: syntax error\n", line);
102 		    state = ERROR;
103 		}
104 	    }
105 	    break;
106 	case COMMENT:
107 	    if (c == '\n')
108 		state = NEWLINE;
109 	    break;
110 	case INSTRING:
111 	    switch (c) {
112 	    case '\\':
113 		state = ESCAPE;
114 		break;
115 	    case '"':
116 		state = DONE;
117 		break;
118 	    default:
119 		*p++ = c;
120 	    }
121 	    break;
122 	case ESCAPE:
123 	    switch (c) {
124 	    case 'n':
125 		*p++ = '\n';
126 		break;
127 	    case 'r':
128 		*p++ = '\r';
129 		break;
130 	    case 't':
131 		*p++ = '\t';
132 		break;
133 	    case 'e':
134 #ifdef EBCDIC
135 		*p++ = ESC;
136 #else
137 		*p++ = '\033';
138 #endif
139 		break;
140 	    case 'a':
141 #ifdef EBCDIC
142 		*p++ = '\a';
143 #else
144 		*p++ = '\007';
145 #endif
146 		break;
147 	    case '#':
148 	    case '"':
149 	    case '\\':
150 		*p++ = c;
151 		break;
152 	    case '\n':
153 		break;
154 	    case '0':
155 	    case '1':
156 	    case '2':
157 	    case '3':
158 	    case '4':
159 	    case '5':
160 	    case '6':
161 	    case '7':
162 		d = c - '0';
163 		while ((c = fgetc(f)) >= '0' && c <= '7')
164 		    d = 8 * d + c - '0';
165 #ifdef EBCDIC
166 /* dirty hack for \007 chars in LANG:    LANG("\n\007....")
167    The right way is to replace all \007 by \a in the args of LANG() */
168 		if (d == 7) d = '\a';
169 #endif
170 		*p++ = d;
171 		ungetc(c, f);
172 		break;
173 	    default:
174 		fprintf(stderr,
175 			"language.txt:%d: illegal escape sequence: '\\%c'\n",
176 			line, c);
177 		break;
178 	    }
179 	    state = INSTRING;
180 	    break;
181 	case IDENT:		/* language identifier */
182 	    if (c == ':') {
183 		state = NEWLINE;
184 		break;
185 	    }
186 	    if (c == '\n' && strncmp(lang, "No translation", 14) == 0) {
187 		i = 0;
188 		state = NEWLINE;
189 		break;
190 	    }
191 	    lang[i++] = c;
192 	    if (i == 15 || !isalnum(c) && !isspace(c)) {
193 		lang[i] = '\0';
194 		fprintf(stderr,
195 			"language.txt:%d: bad language identifier: '%s'\n",
196 			line, lang);
197 		state = ERROR;
198 		i = 0;
199 	    }
200 	    break;
201 	case DONE:
202 	    if (c == '\n') {
203 		lang[i] = '\0';
204 		*p = '\0';
205 		return buf;
206 	    }
207 	    if (!isspace(c)) {
208 		fprintf(stderr,
209 			"language.txt:%d: extra characters after '\"'\n",
210 			line);
211 		state = ERROR;
212 	    }
213 	    break;
214 	case ERROR:
215 	    if (c == '\n')
216 		state = ERR1;
217 	    break;
218 	case ERR1:
219 	    state = (c == '\n' ? NEWLINE : ERROR);
220 	    break;
221 	}
222     }
223     if (state != NEWLINE)
224 	fprintf(stderr, "language.txt: unexpected EOF\n");
225     return NULL;
226 }
227 
228 #ifdef TEST
229 main()
230 {
231     char buf[2048];
232 
233     line = 1;
234     while (readstr(stdin, buf, 0)) {
235 	printf("\nen: <%s>\n", buf);
236 	while (readstr(stdin, buf, 1) && *buf != '\0')
237 	    printf("%s: <%s>\n", lang, buf);
238     }
239     exit(0);
240 }
241 #else
242 
243 static struct indx_ent {
244     word32 crc;
245     long offset;
246 } *indx_tbl = NULL;
247 
248 static int max_msgs = 0;
249 static long nmsg = 0;
250 
251 static FILE *langf;
252 
253 static void init_lang(void);
254 
255 static int make_indexfile(char *);
256 
257 /*
258  * uses 24-bit CRC function from armor.c
259  */
260 static word32
261  message_crc(char *s)
262 {
263     return crcbytes((byte *) s, strlen(s), (word32) 0);
264 }
265 
266 /*
267  * lookup file offset in indx_tbl
268  */
269 static long lookup_offset(word32 crc)
270 {
271     int i;
272 
273     for (i = 0; i < nmsg; ++i)
274 	if (indx_tbl[i].crc == crc)
275 	    return indx_tbl[i].offset;
276     return -1;
277 }
278 
279 
280 /*
281  * return foreign translation of s
282  */
283 char *
284  LANG(char *s)
285 {
286     long filepos;
287 #ifdef MACTC5
288 	extern Boolean contains_yesNo, contains_enough, contains_badpass;
289 	contains_yesNo = (((void *) strstr(s, "(Y/n)?") != NULL) ||
290 	    ((void *) strstr(s, "(y/N)?") != NULL));
291 	contains_enough = ((void *) strstr(s, "Enough") != NULL);
292 	contains_badpass = (strstr(s, "Bad pass phrase.") != NULL);
293 #endif /* MACTC5 */
294     if (subtitles_available == 0)
295 	init_lang();
296     if (subtitles_available < 0)
297 	return s;
298 
299     filepos = lookup_offset(message_crc(s));
300     if (filepos == -1) {
301 	return s;
302     } else {
303 	fseek(langf, filepos, SEEK_SET);
304 	readstr(langf, strbuf, 1);
305     }
306 
307     if (strbuf[0] == '\0')
308 	return s;
309 
310 #ifndef EBCDIC /* no conversion for ebcdic printf() messages needed */
311     for (s = strbuf; *s; ++s)
312 	*s = EXT_C(*s);
313 #endif
314     return strbuf;
315 }
316 
317 
318 static struct {
319     long lang_fsize;		/* size of language.txt */
320     char lang[16];		/* language identifier */
321     long nmsg;			/* number of messages */
322 } indx_hdr;
323 
324 
325 /*
326  * initialize the index table: read it from language.idx or create
327  * a new one and write it to the index file. A new index file is
328  * created if the language set in config.pgp doesn't match the one
329  * in language.idx or if the size of language.txt has changed.
330  */
331 static void init_lang()
332 {
333     char indexfile[MAX_PATH];
334     char subtitles_file[MAX_PATH];
335     FILE *indexf;
336 #ifdef PGP_SYSTEM_DIR
337     int use_system_wide_lang = 0;
338 #endif
339 
340     if (strcmp(language, "en") == 0) {
341 	subtitles_available = -1;
342 	return;			/* use default messages */
343     }
344     buildfilename(subtitles_file, SUBTITLES_FILE);
345     langf = fopen(subtitles_file, FOPRBIN); /* Open file in binary mode... */
346     if (langf == NULL) {
347 #ifdef PGP_SYSTEM_DIR
348 	strcpy(subtitles_file, PGP_SYSTEM_DIR);
349 	strcat(subtitles_file, SUBTITLES_FILE);
350 	langf = fopen(subtitles_file, FOPRBIN); /* Open file in binary mode... */
351 	use_system_wide_lang = 1;
352 	if (langf == NULL)
353 #endif
354 	{
355 	    subtitles_available = -1;
356 	    return;
357 	}
358     }
359     init_crc();
360     strbuf = (char *) malloc(STRBUFSIZE);
361     if (strbuf == NULL) {
362 	fprintf(stderr, "Not enough memory for foreign subtitles\n");
363 	fclose(langf);
364 	subtitles_available = -1;
365 	return;
366     }
367 #ifdef PGP_SYSTEM_DIR
368     if (use_system_wide_lang) {
369 	strcpy(indexfile, PGP_SYSTEM_DIR);
370 	strcat(indexfile, LANG_INDEXFILE);
371     } else
372 #endif
373     buildfilename(indexfile, LANG_INDEXFILE);
374     indexf = fopen(indexfile, FOPRBIN);
375     if (indexf != NULL) {
376 	if (fread(&indx_hdr, 1, sizeof(indx_hdr),
377 		  indexf) == sizeof(indx_hdr) &&
378 	    indx_hdr.lang_fsize == fsize(langf) &&
379 	    strcmp(indx_hdr.lang, language) == 0) {
380 	    nmsg = indx_hdr.nmsg;
381 	    indx_tbl = (struct indx_ent *) malloc(nmsg *
382 						  sizeof(struct indx_ent));
383 	    if (indx_tbl == NULL) {
384 		fprintf(stderr, "Not enough memory for foreign subtitles\n");
385 		fclose(indexf);
386 		fclose(langf);
387 		subtitles_available = -1;
388 		return;
389 	    }
390 	    if (fread(indx_tbl,
391 		      sizeof(struct indx_ent), nmsg, indexf) != nmsg) {
392 		free(indx_tbl);	/* create a new one */
393 		indx_tbl = NULL;
394 	    }
395 	}
396 	fclose(indexf);
397     }
398     if (indx_tbl == NULL && make_indexfile(indexfile) < 0) {
399 	fclose(langf);
400 	subtitles_available = -1;
401     } else {
402 	subtitles_available = 1;
403     }
404 }
405 
406 
407 static int make_indexfile(char *indexfile)
408 {
409     FILE *indexf;
410     long filepos;
411     int total_msgs = 0;
412     char *res;
413 
414     if (verbose)		/* must be set in config.pgp */
415 	fprintf(stderr,
416 		"Creating language index file '%s' for language \"%s\"\n",
417 		indexfile, language);
418     rewind(langf);
419     indx_hdr.lang_fsize = fsize(langf);
420     strncpy(indx_hdr.lang, language, 15);
421     init_crc();
422     line = 1;
423     nmsg = 0;
424     while (readstr(langf, strbuf, 0)) {
425 	if (nmsg == max_msgs) {
426 	    if (max_msgs) {
427 		max_msgs *= 2;
428 		indx_tbl = (struct indx_ent *) realloc(indx_tbl, max_msgs *
429 						sizeof(struct indx_ent));
430 	    } else {
431 		max_msgs = 400;
432 		indx_tbl = (struct indx_ent *) malloc(max_msgs *
433 						sizeof(struct indx_ent));
434 	    }
435 	    if (indx_tbl == NULL) {
436 		fprintf(stderr, "Not enough memory for foreign subtitles\n");
437 		return -1;
438 	    }
439 	}
440 	++total_msgs;
441 	indx_tbl[nmsg].crc = message_crc(strbuf);
442 	if (lookup_offset(indx_tbl[nmsg].crc) != -1)
443 	    fprintf(stderr,
444 		    "language.txt:%d: Message CRC not unique: \"%s\"\n",
445 		    line, strbuf);
446 	do {
447 	    filepos = ftell(langf);
448 	    res = readstr(langf, strbuf, 1);	/* Abort if find newline
449 						   first */
450 	} while (res && strbuf[0] != '\0' && strcmp(language, lang) != 0);
451 
452 	if (res == NULL)
453 	    break;
454 	if (strbuf[0] == '\0')	/* No translation */
455 	    continue;
456 
457 	indx_tbl[nmsg].offset = filepos;
458 	++nmsg;
459 	do
460 	    res = readstr(langf, strbuf, 1);	/* Abort if find newline
461 						   first */
462 	while (res && strbuf[0] != '\0');
463     }
464     line = 0;
465     indx_hdr.nmsg = nmsg;
466     if (nmsg == 0) {
467 	fprintf(stderr, "No translations available for language \"%s\"\n\n",
468 		language);
469 	return -1;
470     }
471     if (verbose || total_msgs != nmsg)
472 	fprintf(stderr, "%d messages, %d translations\n\n", total_msgs, nmsg);
473 
474     if ((indexf = fopen(indexfile, FOPWBIN)) == NULL) {
475 	fprintf(stderr, "Cannot create %s\n", indexfile);
476     } else {
477 	fwrite(&indx_hdr, 1, sizeof(indx_hdr), indexf);
478 	fwrite(indx_tbl, sizeof(struct indx_ent), nmsg, indexf);
479 	if (ferror(indexf) || fclose(indexf))
480 	    fprintf(stderr, "error writing %s\n", indexfile);
481     }
482     return 0;
483 }
484 #endif				/* TEST */
485