1 /* 2 * language.c - Foreign language translation for PGP 3 * Finds foreign language "subtitles" for English phrases 4 * in external foriegn language text file. 5 */ 6 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <ctype.h> 11 #include "usuals.h" 12 #ifndef LANGTOOL 13 #include "fileio.h" 14 #include "language.h" 15 #include "pgp.h" 16 #else 17 #define MAX_PATH 255 18 boolean verbose; 19 long fsize(); 20 #endif 21 22 char langfile[80] = "language.txt"; 23 #define LANG_INDEXFILE "language.idx" 24 25 #define STRBUFSIZE 2048 26 27 char language[16] = "en"; /* The language code, defaults to English */ 28 static char *strbuf; 29 static char lang[16]; /* readstr sets this to the language id of the msg it last read */ 30 static int subtitles_available = 0; 31 static int line = 0; 32 static int errcount = 0; 33 /* subtitles_available is used to determine if we know whether the special 34 subtitles_file exists. subtitles_available has the following values: 35 0 = first time thru, we don't yet know if subtitles_file exists. 36 1 = we have already determined that subtitles_file exists. 37 -1 = we have already determined that subtitles_file does not exist. 38 */ 39 40 static void error(char *); 41 42 #define NEWLINE 0 43 #define COMMENT 1 44 #define INSTRING 2 45 #define ESCAPE 3 46 #define IDENT 4 47 #define DONE 5 48 #define ERROR 6 49 #define ERR1 7 50 51 /* Look for and return a quoted string from the file. 52 * If nlabort is true, return failure if we find a blank line 53 * before we find the opening quote. 54 */ 55 static char * 56 readstr (FILE *f, char *buf, int nlabort) 57 { 58 int c, d; 59 char *p = buf; 60 int state = NEWLINE; 61 int i = 0; 62 63 while ((c = getc(f)) != EOF) { 64 if (c == '\r') 65 continue; 66 /* line numbers are only incremented when creating index file */ 67 if (line && c == '\n') 68 ++line; 69 switch (state) { 70 case NEWLINE: 71 switch(c) { 72 case '#': state = COMMENT; break; 73 case '"': state = INSTRING; break; 74 case '\n': 75 if (nlabort) { 76 *buf = '\0'; 77 return(buf); 78 } 79 default: 80 if (i == 0 && isalnum(c)) { 81 state = IDENT; 82 lang[i++] = c; 83 break; 84 } 85 if (!isspace(c)) { 86 error("syntax error\n"); 87 state = ERROR; 88 } 89 } 90 break; 91 case COMMENT: 92 if (c == '\n') 93 state = NEWLINE; 94 break; 95 case INSTRING: 96 switch(c) { 97 case '\\': state = ESCAPE; break; 98 case '"': state = DONE; break; 99 default: *p++ = c; 100 } 101 break; 102 case ESCAPE: 103 switch (c) { 104 case 'n': *p++ = '\n'; break; 105 case 'r': *p++ = '\r'; break; 106 case 't': *p++ = '\t'; break; 107 case 'e': *p++ = '\033'; break; 108 case 'a': *p++ = '\007'; break; 109 case '#': 110 case '"': 111 case '\\': *p++ = c; break; 112 case '\n': break; 113 case '0': 114 case '1': 115 case '2': 116 case '3': 117 case '4': 118 case '5': 119 case '6': 120 case '7': 121 /* ANSI C rules: up to 3 octal digits */ 122 d = c - '0'; 123 if ((c = getc(f)) >= '0' && c <= '7') { 124 d = (d<<3) + (c-'0'); 125 if ((c = getc(f)) >= '0' && c <= '7') 126 d = (d<<3) + (c-'0'); 127 else 128 ungetc(c, f); 129 } else { 130 ungetc(c, f); 131 } 132 *p++ = d; 133 break; 134 default: 135 error("illegal escape sequence: "); 136 fprintf(stderr, "'\\%c'\n", c); 137 break; 138 } 139 state = INSTRING; 140 break; 141 case IDENT: /* language identifier */ 142 if (c == ':') { 143 state = NEWLINE; 144 break; 145 } 146 if (c == '\n' && strncmp(lang, "No translation", 14) == 0) 147 { 148 i = 0; 149 state = NEWLINE; 150 break; 151 } 152 lang[i++] = c; 153 if (i == 15 || !isalnum(c) && !isspace(c)) { 154 lang[i] = '\0'; 155 error("bad language identifier\n"); 156 state = ERROR; 157 i = 0; 158 } 159 break; 160 case DONE: 161 if (c == '\n') { 162 lang[i] = '\0'; 163 *p = '\0'; 164 return(buf); 165 } 166 if (!isspace(c)) { 167 error("extra characters after '\"'\n"); 168 state = ERROR; 169 } 170 break; 171 case ERROR: 172 if (c == '\n') 173 state = ERR1; 174 break; 175 case ERR1: 176 state = (c == '\n' ? NEWLINE : ERROR); 177 break; 178 } 179 } 180 if (state != NEWLINE) 181 error("unexpected EOF\n"); 182 return(NULL); 183 } 184 185 186 static struct indx_ent { 187 word32 crc; 188 long offset; 189 } *indx_tbl = NULL; 190 191 static int max_msgs = 0; 192 static int nmsg = 0; 193 194 static FILE *langf; 195 196 static struct { 197 long lang_fsize; /* size of language.txt */ 198 char lang[16]; /* language identifier */ 199 int nmsg; /* number of messages */ 200 } indx_hdr; 201 202 203 static int make_indexfile(char *); 204 word32 crcupdate(byte, word32); 205 void init_crc(); 206 207 /* 208 * uses 24-bit CRC function from armor.c 209 */ 210 static word32 211 message_crc(char *s) 212 { 213 word32 crc = 0; 214 215 while (*s) 216 crc = crcupdate(*s++, crc); 217 return(crc); 218 } 219 220 /* 221 * lookup file offset in indx_tbl 222 */ 223 static long 224 lookup_offset(word32 crc) 225 { 226 int i; 227 228 for (i = 0; i < nmsg; ++i) 229 if (indx_tbl[i].crc == crc) 230 return(indx_tbl[i].offset); 231 return(-1); 232 } 233 234 235 236 #ifndef LANGTOOL 237 static void init_lang(); 238 239 /* 240 * return foreign translation of s 241 */ 242 char * 243 PSTR (char *s) 244 { 245 long filepos; 246 247 if (subtitles_available == 0) 248 init_lang(); 249 if (subtitles_available < 0) 250 return(s); 251 252 filepos = lookup_offset(message_crc(s)); 253 if (filepos == -1) { 254 return(s); 255 } else { 256 fseek(langf, filepos, SEEK_SET); 257 readstr(langf, strbuf, 1); 258 } 259 260 if (strbuf[0] == '\0') 261 return(s); 262 263 for (s = strbuf; *s; ++s) 264 *s = EXT_C(*s); 265 return(strbuf); 266 } 267 268 /* 269 * initialize the index table: read it from language.idx or create 270 * a new one and write it to the index file. A new index file is 271 * created if the language set in config.pgp doesn't match the one 272 * in language.idx or if the size of language.txt has changed. 273 */ 274 static void 275 init_lang() 276 { 277 char indexfile[MAX_PATH]; 278 char subtitles_file[MAX_PATH]; 279 FILE *indexf; 280 281 if (strcmp(language, "en") == 0) { 282 subtitles_available = -1; 283 return; /* use default messages */ 284 } 285 286 buildfilename (subtitles_file, langfile); 287 if ((langf = fopen(subtitles_file, "rb")) == NULL) { 288 subtitles_available = -1; 289 return; 290 } 291 init_crc(); 292 if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) { 293 fprintf(stderr, "Not enough memory for foreign subtitles\n"); 294 fclose(langf); 295 subtitles_available = -1; 296 return; 297 } 298 buildfilename(indexfile, LANG_INDEXFILE); 299 if ((indexf = fopen(indexfile, "rb")) != NULL) { 300 if (fread(&indx_hdr, sizeof(indx_hdr), 1, indexf) == 1 && 301 indx_hdr.lang_fsize == fsize(langf) && 302 strcmp(indx_hdr.lang, language) == 0) 303 { 304 nmsg = indx_hdr.nmsg; 305 indx_tbl = (struct indx_ent *) malloc(nmsg * sizeof(struct indx_ent)); 306 if (indx_tbl == NULL) { 307 fprintf(stderr, "Not enough memory for foreign subtitles\n"); 308 fclose(indexf); 309 fclose(langf); 310 subtitles_available = -1; 311 return; 312 } 313 if (fread(indx_tbl, sizeof(struct indx_ent), nmsg, indexf) != nmsg) 314 { 315 free(indx_tbl); /* create a new one */ 316 indx_tbl = NULL; 317 } 318 } 319 fclose(indexf); 320 } 321 if (indx_tbl == NULL && make_indexfile(indexfile) < 0) { 322 fclose(langf); 323 subtitles_available = -1; 324 } else { 325 subtitles_available = 1; 326 } 327 } 328 #endif /* !LANGTOOL */ 329 330 331 /* 332 * build the index table in memory, and if indexfile is not NULL, 333 * write it to this file 334 */ 335 static int 336 make_indexfile(char *indexfile) 337 { 338 FILE *indexf; 339 long filepos; 340 int total_msgs = 0; 341 char *res; 342 343 rewind(langf); 344 indx_hdr.lang_fsize = fsize(langf); 345 strncpy(indx_hdr.lang, language, 15); 346 init_crc(); 347 line = 1; 348 nmsg = 0; 349 while (readstr(langf, strbuf, 0)) { 350 if (nmsg == max_msgs) { 351 if (max_msgs) { 352 max_msgs *= 2; 353 indx_tbl = (struct indx_ent *) realloc(indx_tbl, max_msgs * 354 sizeof(struct indx_ent)); 355 } else { 356 max_msgs = 400; 357 indx_tbl = (struct indx_ent *) malloc(max_msgs * 358 sizeof(struct indx_ent)); 359 } 360 if (indx_tbl == NULL) { 361 fprintf(stderr, "Not enough memory for foreign subtitles\n"); 362 return(-1); 363 } 364 } 365 ++total_msgs; 366 indx_tbl[nmsg].crc = message_crc(strbuf); 367 if (lookup_offset(indx_tbl[nmsg].crc) != -1) 368 error("message CRC not unique.\n"); 369 do { 370 filepos = ftell(langf); 371 res = readstr (langf, strbuf, 1); /* Abort if find newline first */ 372 if (*language == '\0') /* use first language found */ 373 strcpy(language, lang); 374 } while (res && strbuf[0] != '\0' && strcmp(language, lang) != 0); 375 376 if (res == NULL) 377 break; 378 if (strbuf[0] == '\0') /* No translation */ 379 continue; 380 381 indx_tbl[nmsg].offset = filepos; 382 ++nmsg; 383 do 384 res = readstr (langf, strbuf, 1); /* Abort if find newline first */ 385 while (res && strbuf[0] != '\0'); 386 } 387 line = 0; 388 indx_hdr.nmsg = nmsg; 389 if (verbose) 390 fprintf(stderr, "%s: %d messages, %d translations for language \"%s\"\n", 391 langfile, total_msgs, nmsg, language); 392 if (nmsg == 0) { 393 fprintf(stderr, "No translations available for language \"%s\"\n\n", 394 language); 395 return(-1); 396 } 397 398 if (indexfile) { 399 if ((indexf = fopen(indexfile, "wb")) == NULL) 400 fprintf(stderr, "Cannot create %s\n", indexfile); 401 else { 402 fwrite(&indx_hdr, 1, sizeof(indx_hdr), indexf); 403 fwrite(indx_tbl, sizeof(struct indx_ent), nmsg, indexf); 404 if (ferror(indexf) || fclose(indexf)) 405 fprintf(stderr, "error writing %s\n", indexfile); 406 } 407 } 408 return(0); 409 } 410 411 static void 412 error(char *s) 413 { 414 ++errcount; 415 if (langfile[0]) 416 fprintf(stderr, "%s:", langfile); 417 if (line) 418 fprintf(stderr, "%d:", line); 419 fprintf(stderr, " %s", s); 420 } 421 422 #ifdef LANGTOOL 423 /* 424 * language string tool for manipulating language files 425 * link with CRC routines from armor.c 426 */ 427 428 #define CMD_EXTRACT 1 429 #define CMD_CHECK 2 430 #define CMD_MERGE 3 431 432 extern char *optarg; 433 extern int optind; 434 435 main(int argc, char **argv) 436 { 437 int opt, cmd = 0, rc = 0; 438 char *langIDs[16]; 439 char *outfile = NULL; 440 441 init_crc(); 442 if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) { 443 perror(argv[0]); 444 exit(1); 445 } 446 while ((opt = getopt(argc, argv, "cxmo:")) != EOF) { 447 switch (opt) { 448 case 'c': cmd = CMD_CHECK; break; 449 case 'x': cmd = CMD_EXTRACT; break; 450 case 'm': cmd = CMD_MERGE; break; 451 case 'o': outfile = optarg; break; 452 default: usage(); 453 } 454 } 455 argc -= optind; argv += optind; 456 switch (cmd) { 457 case CMD_EXTRACT: 458 if (argc < 2) 459 usage(); 460 rc = extract(argv[0], outfile, &argv[1]); 461 break; 462 case CMD_MERGE: 463 if (argc < 2) 464 usage(); 465 rc = merge(argv[0], argv[1], outfile, argv[2]); 466 break; 467 case CMD_CHECK: 468 verbose = 1; 469 if (argc == 0) 470 checkfile("language.txt"); 471 else 472 while (--argc >= 0) 473 checkfile(*argv++); 474 break; 475 default: usage(); 476 } 477 exit(rc); 478 } 479 480 usage() 481 { 482 fprintf(stderr, "usage: langtool -[x|c|m] [-o outputfile] ...\n\n\ 483 To extract one or more languages from a merged file:\n\ 484 langtool -x [-o outputfile] file langID...\n\n\ 485 To check a language file for syntax errors:\n\ 486 langtool -c file...\n\n\ 487 To merge language \"lang\" from lang_file with source_file:\n\ 488 langtool -m [-o outputfile] source_file lang_file [lang]\n"); 489 exit(1); 490 } 491 492 merge(char *base_file, char *lang_file, char *outfile, char *langID) 493 { 494 FILE *fp, *outf; 495 long fpos = 0, filepos; 496 int newmsgs = 0; 497 498 if ((langf = fopen(lang_file, "r")) == NULL) { 499 perror(lang_file); 500 return -1; 501 } 502 strcpy(langfile, lang_file); 503 if (langID) 504 strcpy(language, langID); 505 else 506 language[0] = '\0'; /* use first language found */ 507 508 errcount = 0; 509 make_indexfile(NULL); 510 if (errcount) 511 return -1; 512 513 langfile[0] = '\0'; /* don't print filename in error msgs */ 514 515 if ((fp = fopen(base_file, "r")) == NULL) { 516 perror(base_file); 517 return -1; 518 } 519 if (outfile == NULL) 520 outf = stdout; 521 else { 522 if ((outf = fopen(outfile, "w")) == NULL) { 523 perror(outfile); 524 return(-1); 525 } 526 } 527 528 while (readstr(fp, strbuf, 0)) { 529 copypos(fp, outf, fpos); 530 fpos = ftell(fp); 531 532 filepos = lookup_offset(message_crc(strbuf)); 533 if (filepos == -1) { 534 fprintf(outf, "No translation\n"); 535 ++newmsgs; 536 } else { 537 fseek(langf, filepos, SEEK_SET); 538 readstr(langf, strbuf, 1); 539 copypos(langf, outf, filepos); 540 } 541 542 while (readstr(fp, strbuf, 1)) 543 if (*strbuf == '\0') 544 break; 545 } 546 copypos(fp, outf, fpos); 547 fflush(outf); 548 if (ferror(outf)) { 549 perror(outfile); 550 return -1; 551 } 552 if (newmsgs) 553 fprintf(stderr, "%d untranslated messages\n", newmsgs); 554 return errcount; 555 } 556 557 extract(char *infile, char *outfile, char **langIDs) 558 { 559 FILE *fp, *outf; 560 long fpos = 0; 561 char **langID; 562 563 if ((fp = fopen(infile, "r")) == NULL) { 564 perror(infile); 565 return -1; 566 } 567 if (outfile == NULL) { 568 outf = stdout; 569 } else { 570 if ((outf = fopen(outfile, "w")) == NULL) { 571 perror(outfile); 572 fclose(fp); 573 return(-1); 574 } 575 } 576 577 while (readstr(fp, strbuf, 0)) { 578 copypos(fp, outf, fpos); 579 fpos = ftell(fp); 580 while (readstr(fp, strbuf, 1)) { 581 if (*strbuf == '\0') 582 break; 583 for (langID = langIDs; *langID; ++langID) { 584 if (strcmp(lang, *langID) == 0) 585 copypos(fp, outf, fpos); 586 } 587 fpos = ftell(fp); 588 } 589 } 590 copypos(fp, outf, fpos); 591 fflush(outf); 592 if (ferror(outf)) { 593 perror(outfile); 594 return -1; 595 } 596 return 0; 597 } 598 599 checkfile(char *name) 600 { 601 if ((langf = fopen(name, "rb")) == NULL) { 602 perror(name); 603 return -1; 604 } 605 strcpy(langfile, name); 606 language[0] = '\0'; /* count messages for first language */ 607 errcount = 0; 608 make_indexfile(NULL); 609 fclose(langf); 610 return errcount; 611 } 612 613 copypos(FILE *f, FILE *g, long pos) 614 { 615 long size; 616 size = ftell(f) - pos; 617 fseek(f, pos, SEEK_SET); 618 copyfile(f, g, size); 619 } 620 621 copyfile(FILE *f, FILE *g, long n) 622 { 623 int c; 624 625 while (--n >= 0 && (c = getc(f)) != EOF) 626 putc(c, g); 627 } 628 629 long 630 fsize(FILE *f) 631 { 632 long len, pos; 633 pos = ftell(f); 634 fseek(f, 0L, SEEK_END); 635 len = ftell(f); 636 fseek(f, pos, SEEK_SET); 637 return len; 638 } 639 #endif /* LANGTOOL */ 640