1 /*
2  *	language.c - Foreign language translation for PGP
3  *	Finds foreign language "subtitles" for English phrases
4  *	in external foriegn language text file.
5  */
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include "usuals.h"
12 #ifndef LANGTOOL
13 #include "fileio.h"
14 #include "language.h"
15 #include "pgp.h"
16 #else
17 #define MAX_PATH	255
18 boolean verbose;
19 long fsize();
20 #endif
21 
22 char langfile[80] = "language.txt";
23 #define LANG_INDEXFILE	"language.idx"
24 
25 #define	STRBUFSIZE		2048
26 
27 char language[16] = "en";	/* The language code, defaults to English */
28 static char	*strbuf;
29 static char	lang[16];	/* readstr sets this to the language id of the msg it last read */
30 static int	subtitles_available = 0;
31 static int line = 0;
32 static int errcount = 0;
33 /*	subtitles_available is used to determine if we know whether the special
34 	subtitles_file exists.  subtitles_available has the following values:
35 	0  = first time thru, we don't yet know if subtitles_file exists.
36 	1  = we have already determined that subtitles_file exists.
37 	-1 = we have already determined that subtitles_file does not exist.
38 */
39 
40 static void error(char *);
41 
42 #define	NEWLINE		0
43 #define	COMMENT		1
44 #define	INSTRING	2
45 #define	ESCAPE		3
46 #define	IDENT		4
47 #define	DONE		5
48 #define	ERROR		6
49 #define	ERR1		7
50 
51 /* Look for and return a quoted string from the file.
52  * If nlabort is true, return failure if we find a blank line
53  * before we find the opening quote.
54  */
55 static char	*
56 readstr (FILE *f, char *buf, int nlabort)
57 {
58 	int	c, d;
59 	char *p = buf;
60 	int state = NEWLINE;
61 	int i = 0;
62 
63 	while ((c = getc(f)) != EOF) {
64 		if (c == '\r')
65 			continue;
66 		/* line numbers are only incremented when creating index file */
67 		if (line && c == '\n')
68 			++line;
69 		switch (state) {
70 		  case NEWLINE:
71 			switch(c) {
72 			  case '#': state = COMMENT; break;
73 			  case '"': state = INSTRING; break;
74 			  case '\n':
75 				if (nlabort) {
76 					*buf = '\0';
77 					return(buf);
78 				}
79 			  default:
80 				if (i == 0 && isalnum(c)) {
81 					state = IDENT;
82 					lang[i++] = c;
83 					break;
84 				}
85 				if (!isspace(c)) {
86 					error("syntax error\n");
87 					state = ERROR;
88 				}
89 			}
90 			break;
91 		  case COMMENT:
92 			if (c == '\n')
93 				state = NEWLINE;
94 			break;
95 		  case INSTRING:
96 			switch(c) {
97 			  case '\\': state = ESCAPE; break;
98 			  case '"': state = DONE; break;
99 			  default: *p++ = c;
100 			}
101 			break;
102 		case ESCAPE:
103 			switch (c) {
104 			  case 'n':	*p++ = '\n';	break;
105 			  case 'r':	*p++ = '\r';	break;
106 			  case 't':	*p++ = '\t';	break;
107 			  case 'e':	*p++ = '\033';	break;
108 			  case 'a':	*p++ = '\007';	break;
109 			  case '#':
110 			  case '"':
111 			  case '\\':	*p++ = c; break;
112 			  case '\n':	break;
113 			  case '0':
114 			  case '1':
115 			  case '2':
116 			  case '3':
117 			  case '4':
118 			  case '5':
119 			  case '6':
120 			  case '7':
121 				/* ANSI C rules: up to 3 octal digits */
122 				d = c - '0';
123 				if ((c = getc(f)) >= '0' && c <= '7') {
124 					d = (d<<3) + (c-'0');
125 					if ((c = getc(f)) >= '0' && c <= '7')
126 						d = (d<<3) + (c-'0');
127 					else
128 						ungetc(c, f);
129 				} else {
130 					ungetc(c, f);
131 				}
132 				*p++ = d;
133 				break;
134 			  default:
135 				error("illegal escape sequence: ");
136 				fprintf(stderr, "'\\%c'\n", c);
137 				break;
138 			}
139 			state = INSTRING;
140 			break;
141 		  case IDENT:		/* language identifier */
142 			if (c == ':') {
143 				state = NEWLINE;
144 				break;
145 			}
146 			if (c == '\n' && strncmp(lang, "No translation", 14) == 0)
147 			{
148 				i = 0;
149 				state = NEWLINE;
150 				break;
151 			}
152 			lang[i++] = c;
153 			if (i == 15 || !isalnum(c) && !isspace(c)) {
154 				lang[i] = '\0';
155 				error("bad language identifier\n");
156 				state = ERROR;
157 				i = 0;
158 			}
159 			break;
160 		  case DONE:
161 			if (c == '\n') {
162 				lang[i] = '\0';
163 				*p = '\0';
164 				return(buf);
165 			}
166 			if (!isspace(c)) {
167 				error("extra characters after '\"'\n");
168 				state = ERROR;
169 			}
170 			break;
171 		  case ERROR:
172 			if (c == '\n')
173 				state = ERR1;
174 			break;
175 		  case ERR1:
176 			state = (c == '\n' ? NEWLINE : ERROR);
177 			break;
178 		}
179 	}
180 	if (state != NEWLINE)
181 		error("unexpected EOF\n");
182 	return(NULL);
183 }
184 
185 
186 static struct indx_ent {
187 	word32	crc;
188 	long	offset;
189 } *indx_tbl = NULL;
190 
191 static int max_msgs = 0;
192 static int nmsg = 0;
193 
194 static FILE *langf;
195 
196 static struct {
197 	long lang_fsize;	/* size of language.txt */
198 	char lang[16];		/* language identifier */
199 	int nmsg;		/* number of messages */
200 } indx_hdr;
201 
202 
203 static int make_indexfile(char *);
204 word32 crcupdate(byte, word32);
205 void init_crc();
206 
207 /*
208  * uses 24-bit CRC function from armor.c
209  */
210 static word32
211 message_crc(char *s)
212 {
213 	word32 crc = 0;
214 
215 	while (*s)
216 		crc = crcupdate(*s++, crc);
217 	return(crc);
218 }
219 
220 /*
221  * lookup file offset in indx_tbl
222  */
223 static long
224 lookup_offset(word32 crc)
225 {
226 	int i;
227 
228 	for (i = 0; i < nmsg; ++i)
229 		if (indx_tbl[i].crc == crc)
230 			return(indx_tbl[i].offset);
231 	return(-1);
232 }
233 
234 
235 
236 #ifndef LANGTOOL
237 static void init_lang();
238 
239 /*
240  * return foreign translation of s
241  */
242 char *
243 PSTR (char *s)
244 {
245 	long filepos;
246 
247 	if (subtitles_available == 0)
248 		init_lang();
249 	if (subtitles_available < 0)
250 		return(s);
251 
252 	filepos = lookup_offset(message_crc(s));
253 	if (filepos == -1) {
254 		return(s);
255 	} else {
256 		fseek(langf, filepos, SEEK_SET);
257 		readstr(langf, strbuf, 1);
258 	}
259 
260 	if (strbuf[0] == '\0')
261 		return(s);
262 
263 	for (s = strbuf; *s; ++s)
264 		*s = EXT_C(*s);
265 	return(strbuf);
266 }
267 
268 /*
269  * initialize the index table: read it from language.idx or create
270  * a new one and write it to the index file. A new index file is
271  * created if the language set in config.pgp doesn't match the one
272  * in language.idx or if the size of language.txt has changed.
273  */
274 static void
275 init_lang()
276 {
277 	char indexfile[MAX_PATH];
278 	char subtitles_file[MAX_PATH];
279 	FILE *indexf;
280 
281 	if (strcmp(language, "en") == 0) {
282 		subtitles_available = -1;
283 		return;		/* use default messages */
284 	}
285 
286 	buildfilename (subtitles_file, langfile);
287 	if ((langf = fopen(subtitles_file, "rb")) == NULL) {
288 		subtitles_available = -1;
289 		return;
290 	}
291 	init_crc();
292 	if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) {
293 		fprintf(stderr, "Not enough memory for foreign subtitles\n");
294 		fclose(langf);
295 		subtitles_available = -1;
296 		return;
297 	}
298 	buildfilename(indexfile, LANG_INDEXFILE);
299 	if ((indexf = fopen(indexfile, "rb")) != NULL) {
300 		if (fread(&indx_hdr, sizeof(indx_hdr), 1, indexf) == 1 &&
301 			indx_hdr.lang_fsize == fsize(langf) &&
302 			strcmp(indx_hdr.lang, language) == 0)
303 		{
304 			nmsg = indx_hdr.nmsg;
305 			indx_tbl = (struct indx_ent *) malloc(nmsg * sizeof(struct indx_ent));
306 			if (indx_tbl == NULL) {
307 				fprintf(stderr, "Not enough memory for foreign subtitles\n");
308 				fclose(indexf);
309 				fclose(langf);
310 				subtitles_available = -1;
311 				return;
312 			}
313 			if (fread(indx_tbl, sizeof(struct indx_ent), nmsg, indexf) != nmsg)
314 			{
315 				free(indx_tbl);	/* create a new one */
316 				indx_tbl = NULL;
317 			}
318 		}
319 		fclose(indexf);
320 	}
321 	if (indx_tbl == NULL && make_indexfile(indexfile) < 0) {
322 		fclose(langf);
323 		subtitles_available = -1;
324 	} else {
325 		subtitles_available = 1;
326 	}
327 }
328 #endif /* !LANGTOOL */
329 
330 
331 /*
332  * build the index table in memory, and if indexfile is not NULL,
333  * write it to this file
334  */
335 static int
336 make_indexfile(char *indexfile)
337 {
338 	FILE *indexf;
339 	long filepos;
340 	int total_msgs = 0;
341 	char *res;
342 
343 	rewind(langf);
344 	indx_hdr.lang_fsize = fsize(langf);
345 	strncpy(indx_hdr.lang, language, 15);
346 	init_crc();
347 	line = 1;
348 	nmsg = 0;
349 	while (readstr(langf, strbuf, 0)) {
350 		if (nmsg == max_msgs) {
351 			if (max_msgs) {
352 				max_msgs *= 2;
353 				indx_tbl = (struct indx_ent *) realloc(indx_tbl, max_msgs *
354 							sizeof(struct indx_ent));
355 			} else {
356 				max_msgs = 400;
357 				indx_tbl = (struct indx_ent *) malloc(max_msgs *
358 							sizeof(struct indx_ent));
359 			}
360 			if (indx_tbl == NULL) {
361 				fprintf(stderr, "Not enough memory for foreign subtitles\n");
362 				return(-1);
363 			}
364 		}
365 		++total_msgs;
366 		indx_tbl[nmsg].crc = message_crc(strbuf);
367 		if (lookup_offset(indx_tbl[nmsg].crc) != -1)
368 			error("message CRC not unique.\n");
369 		do {
370 			filepos = ftell(langf);
371 			res = readstr (langf, strbuf, 1);		/* Abort if find newline first */
372 			if (*language == '\0')		/* use first language found */
373 				strcpy(language, lang);
374 		} while (res && strbuf[0] != '\0' && strcmp(language, lang) != 0);
375 
376 		if (res == NULL)
377 			break;
378 		if (strbuf[0] == '\0')	/* No translation */
379 			continue;
380 
381 		indx_tbl[nmsg].offset = filepos;
382 		++nmsg;
383 		do
384 			res = readstr (langf, strbuf, 1);		/* Abort if find newline first */
385 		while (res && strbuf[0] != '\0');
386 	}
387 	line = 0;
388 	indx_hdr.nmsg = nmsg;
389 	if (verbose)
390 		fprintf(stderr, "%s: %d messages, %d translations for language \"%s\"\n",
391 				langfile, total_msgs, nmsg, language);
392 	if (nmsg == 0) {
393 		fprintf(stderr, "No translations available for language \"%s\"\n\n",
394 				language);
395 		return(-1);
396 	}
397 
398 	if (indexfile) {
399 		if ((indexf = fopen(indexfile, "wb")) == NULL)
400 			fprintf(stderr, "Cannot create %s\n", indexfile);
401 		else {
402 			fwrite(&indx_hdr, 1, sizeof(indx_hdr), indexf);
403 			fwrite(indx_tbl, sizeof(struct indx_ent), nmsg, indexf);
404 			if (ferror(indexf) || fclose(indexf))
405 				fprintf(stderr, "error writing %s\n", indexfile);
406 		}
407 	}
408 	return(0);
409 }
410 
411 static void
412 error(char *s)
413 {
414 	++errcount;
415 	if (langfile[0])
416 		fprintf(stderr, "%s:", langfile);
417 	if (line)
418 		fprintf(stderr, "%d:", line);
419 	fprintf(stderr, " %s", s);
420 }
421 
422 #ifdef LANGTOOL
423 /*
424  * language string tool for manipulating language files
425  * link with CRC routines from armor.c
426  */
427 
428 #define CMD_EXTRACT	1
429 #define CMD_CHECK	2
430 #define CMD_MERGE	3
431 
432 extern char *optarg;
433 extern int optind;
434 
435 main(int argc, char **argv)
436 {
437 	int opt, cmd = 0, rc = 0;
438 	char *langIDs[16];
439 	char *outfile = NULL;
440 
441 	init_crc();
442 	if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) {
443 		perror(argv[0]);
444 		exit(1);
445 	}
446 	while ((opt = getopt(argc, argv, "cxmo:")) != EOF) {
447 		switch (opt) {
448 		  case 'c': cmd = CMD_CHECK; break;
449 		  case 'x': cmd = CMD_EXTRACT; break;
450 		  case 'm': cmd = CMD_MERGE; break;
451 		  case 'o': outfile = optarg; break;
452 		  default: usage();
453 		}
454 	}
455 	argc -= optind; argv += optind;
456 	switch (cmd) {
457 		case CMD_EXTRACT:
458 			if (argc < 2)
459 				usage();
460 			rc = extract(argv[0], outfile, &argv[1]);
461 			break;
462 		case CMD_MERGE:
463 			if (argc < 2)
464 				usage();
465 			rc = merge(argv[0], argv[1], outfile, argv[2]);
466 			break;
467 		case CMD_CHECK:
468 			verbose = 1;
469 			if (argc == 0)
470 				checkfile("language.txt");
471 			else
472 				while (--argc >= 0)
473 					checkfile(*argv++);
474 			break;
475 		default: usage();
476 	}
477 	exit(rc);
478 }
479 
480 usage()
481 {
482 	fprintf(stderr, "usage: langtool -[x|c|m] [-o outputfile] ...\n\n\
483 To extract one or more languages from a merged file:\n\
484 	langtool -x [-o outputfile] file langID...\n\n\
485 To check a language file for syntax errors:\n\
486 	langtool -c file...\n\n\
487 To merge language \"lang\" from lang_file with source_file:\n\
488 	langtool -m [-o outputfile] source_file lang_file [lang]\n");
489 	exit(1);
490 }
491 
492 merge(char *base_file, char *lang_file, char *outfile, char *langID)
493 {
494 	FILE *fp, *outf;
495 	long fpos = 0, filepos;
496 	int newmsgs = 0;
497 
498 	if ((langf = fopen(lang_file, "r")) == NULL) {
499 		perror(lang_file);
500 		return -1;
501 	}
502 	strcpy(langfile, lang_file);
503 	if (langID)
504 		strcpy(language, langID);
505 	else
506 		language[0] = '\0';	/* use first language found */
507 
508 	errcount = 0;
509 	make_indexfile(NULL);
510 	if (errcount)
511 		return -1;
512 
513 	langfile[0] = '\0';	/* don't print filename in error msgs */
514 
515 	if ((fp = fopen(base_file, "r")) == NULL) {
516 		perror(base_file);
517 		return -1;
518 	}
519 	if (outfile == NULL)
520 		outf = stdout;
521 	else {
522 		if ((outf = fopen(outfile, "w")) == NULL) {
523 			perror(outfile);
524 			return(-1);
525 		}
526 	}
527 
528 	while (readstr(fp, strbuf, 0)) {
529 		copypos(fp, outf, fpos);
530 		fpos = ftell(fp);
531 
532 		filepos = lookup_offset(message_crc(strbuf));
533 		if (filepos == -1) {
534 			fprintf(outf, "No translation\n");
535 			++newmsgs;
536 		} else {
537 			fseek(langf, filepos, SEEK_SET);
538 			readstr(langf, strbuf, 1);
539 			copypos(langf, outf, filepos);
540 		}
541 
542 		while (readstr(fp, strbuf, 1))
543 			if (*strbuf == '\0')
544 				break;
545 	}
546 	copypos(fp, outf, fpos);
547 	fflush(outf);
548 	if (ferror(outf)) {
549 		perror(outfile);
550 		return -1;
551 	}
552 	if (newmsgs)
553 		fprintf(stderr, "%d untranslated messages\n", newmsgs);
554 	return errcount;
555 }
556 
557 extract(char *infile, char *outfile, char **langIDs)
558 {
559 	FILE *fp, *outf;
560 	long fpos = 0;
561 	char **langID;
562 
563 	if ((fp = fopen(infile, "r")) == NULL) {
564 		perror(infile);
565 		return -1;
566 	}
567 	if (outfile == NULL) {
568 		outf = stdout;
569 	} else {
570 		if ((outf = fopen(outfile, "w")) == NULL) {
571 			perror(outfile);
572 			fclose(fp);
573 			return(-1);
574 		}
575 	}
576 
577 	while (readstr(fp, strbuf, 0)) {
578 		copypos(fp, outf, fpos);
579 		fpos = ftell(fp);
580 		while (readstr(fp, strbuf, 1)) {
581 			if (*strbuf == '\0')
582 				break;
583 			for (langID = langIDs; *langID; ++langID) {
584 				if (strcmp(lang, *langID) == 0)
585 					copypos(fp, outf, fpos);
586 			}
587 			fpos = ftell(fp);
588 		}
589 	}
590 	copypos(fp, outf, fpos);
591 	fflush(outf);
592 	if (ferror(outf)) {
593 		perror(outfile);
594 		return -1;
595 	}
596 	return 0;
597 }
598 
599 checkfile(char *name)
600 {
601 	if ((langf = fopen(name, "rb")) == NULL) {
602 		perror(name);
603 		return -1;
604 	}
605 	strcpy(langfile, name);
606 	language[0] = '\0';	/* count messages for first language */
607 	errcount = 0;
608 	make_indexfile(NULL);
609 	fclose(langf);
610 	return errcount;
611 }
612 
613 copypos(FILE *f, FILE *g, long pos)
614 {
615 	long size;
616 	size = ftell(f) - pos;
617 	fseek(f, pos, SEEK_SET);
618 	copyfile(f, g, size);
619 }
620 
621 copyfile(FILE *f, FILE *g, long n)
622 {
623 	int c;
624 
625 	while (--n >= 0 && (c = getc(f)) != EOF)
626 		putc(c, g);
627 }
628 
629 long
630 fsize(FILE *f)
631 {
632 	long len, pos;
633 	pos = ftell(f);
634 	fseek(f, 0L, SEEK_END);
635 	len = ftell(f);
636 	fseek(f, pos, SEEK_SET);
637 	return len;
638 }
639 #endif /* LANGTOOL */
640