1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <unistd.h>
4 #include <string.h>
5 #include <locale.h>
6
7 #include <errno.h>
8
9 #include "config.h"
10
11 #ifdef HAVE_SYS_STAT_H
12 # include <sys/stat.h>
13 #endif /* HAVE_SYS_STAT_H */
14 #ifdef HAVE_SYS_FILE_H
15 # include <sys/file.h>
16 #endif /* HAVE_SYS_FILE_H */
17 #ifdef HAVE_SYS_TYPES_H
18 # include <sys/types.h>
19 #endif /* HAVE_SYS_TYPES_H */
20 #ifdef HAVE_DIRENT_H
21 # include <dirent.h>
22 #endif /* HAVE_DIRENT_H */
23
24 #ifdef HAVE_GETOPT_H
25 # include <getopt.h>
26 #endif /* HAVE_GETOPT_H */
27
28 #include <librcc.h>
29
30 #ifndef RCC_OPTION_TRANSLATE_SKIP_PARENT
31 # define RCC_OPTION_TRANSLATE_SKIP_PARENT RCC_OPTION_TRANSLATE_SKIP_PARRENT
32 #endif
33
34 typedef enum {
35 MODE_STDIN = 0x1000,
36 MODE_DIRECTORY,
37 MODE_FILE,
38 MODE_FILELIST
39 } Modes;
40
41 int mode = MODE_STDIN;
42
43 typedef enum {
44 OPT_CONFIG = 'c',
45 OPT_ENCODING_IN = 'e',
46 OPT_FROM = 'f',
47 OPT_HELP = 'h',
48 OPT_LANGUAGE_IN = 'l',
49 OPT_TO = 't',
50 OPT_YES = 'y',
51 OPT_ENCODING_OUT,
52 OPT_LANGUAGE_OUT,
53 OPT_TRANSLATION,
54 OPT_CACHING,
55 OPT_CACHE,
56 OPT_AUTODETECT,
57 OPT_OFFLINE,
58 OPT_TIMEOUT,
59 OPT_SUBDIRS,
60 } Options;
61
62 static struct option long_options[] = {
63 {"config", required_argument, 0, OPT_CONFIG },
64 {"from", required_argument, 0, OPT_FROM },
65 {"to", required_argument, 0, OPT_TO },
66 {"force-encoding", required_argument, 0, OPT_ENCODING_IN },
67 {"force-language", required_argument, 0, OPT_LANGUAGE_IN },
68 {"force-target-encoding", required_argument, 0, OPT_ENCODING_OUT },
69 {"force-target-language", required_argument, 0, OPT_LANGUAGE_OUT },
70 {"language-detection", required_argument, 0, OPT_AUTODETECT },
71 {"translation", optional_argument, 0, OPT_TRANSLATION },
72 {"caching", optional_argument, 0, OPT_CACHING },
73 {"cache", required_argument, 0, OPT_CACHE },
74 {"timeout", required_argument, 0, OPT_TIMEOUT },
75 {"force", no_argument, 0, OPT_YES },
76 #ifdef RCC_OPTION_OFFLINE
77 {"allow-offline-processing",no_argument, 0, OPT_OFFLINE },
78 #endif /* RCC_OPTION_OFFLINE */
79 {"disable-subdirs", no_argument, 0, OPT_SUBDIRS },
80 {"stdin", no_argument, &mode, MODE_STDIN },
81 {"directory", no_argument, &mode, MODE_DIRECTORY },
82 {"file", no_argument, &mode, MODE_FILE },
83 {"filelist", no_argument, &mode, MODE_FILELIST },
84 {"help", no_argument, 0, OPT_HELP },
85 { 0, 0, 0, 0 }
86 };
87
Usage(int argc,char * argv[])88 void Usage(int argc, char *argv[]) {
89 printf(
90 "Usage:\n"
91 " %s [options] [mode] [file|directory]\n"
92 " Modes:\n"
93 " --stdin - Convert stdin to stdout\n"
94 " --directory - Convert file names in specified directory\n"
95 " --file - Convert specified file\n"
96 " --filelist - Convert all files writed on stdin\n"
97 " --help - Help message\n"
98 "\n"
99 " Options:\n"
100 " -c <config> - Specify configuration name\n"
101 " -f <class> - Source class ('in' is default)\n"
102 " -t <class> - Output class ('out' is default)\n"
103 " -e <enc> - Force specified source encoding (autodetection)\n"
104 " -l <lang> - Force specified source language (from LC_CTYPE)\n"
105 " --force-target-encoding=<enc>\n"
106 " - Convert to the specified encoding\n"
107 " --force-target-language=<enc>\n"
108 " - Translate to the specified language\n"
109 " --caching=[mode]\n"
110 " - Use recodings cache. Following modes are supported\n"
111 " off - Turn off\n"
112 " use - Use cached values (default)\n"
113 " add - Add new recodings to cache\n"
114 " replace - Replace encodings in cache\n"
115 " --cache=<name>\n"
116 " - Use specified cache database instead of default one\n"
117 " --translation=[mode]\n"
118 " - Enable translation. Following modes are supported:\n"
119 " full - Full\n"
120 " skip_parent - Skip translation to parent lang\n"
121 " skip_related - Skip translation between related langs\n"
122 " english - Translate to english (default)\n"
123 " transliterate - Transliterate\n"
124 " --language-detection=[mode]\n"
125 " - Lanuage autodetection. Following modes are supported:\n"
126 " off - Current language is considered\n"
127 " on - Use only configured langs (default)\n"
128 " all - Try everything (slow)\n"
129 " --timeout=<us>\n"
130 " - Specify recoding timeout in microseconds (1s default)\n"
131 "\n"
132 " -y - Do not ask any question\n"
133 " --disable-subdirs\n"
134 " - Do not descend into the sub directories\n"
135 "\n"
136 " Language Relations:\n"
137 " To prevent unneccesary translations the concept of related/parent languages is\n"
138 " introduced. For each language you can specify a parent language.\n"
139 " skip_parent translation option will turn off translation to parent language\n"
140 " skip_related translation option will additionaly turn off translation from\n"
141 " parent language.\n"
142 "\n"
143 " For example, in the default configuration Russian is parent of Ukrainian, and\n"
144 " English is parent of all other languages. With \"skip_parrent\" option the\n"
145 " translation from Russian to Ukrainian would be turned off, but translation\n"
146 " from Ukrainian to Russian would operate. With \"skip_related\" option the\n"
147 " translation in both directions would be disabled\n"
148 "\n\n"
149 " Language Detection:\n"
150 " Current version uses aspell dictionaries to autodetect language. Therefore,\n"
151 " only languages with aspell available in the system aspell dictionaries are\n"
152 " autodected. Beware, if your system contains a lot of installed languages,\n"
153 " the autodection may take considerable amount of time.\n"
154 "\n\n",
155 argv[0]);
156 }
157
158 /*
159 fs: is a standard class here, we do not need fs detecting here
160 */
161 static rcc_class classes[] = {
162 { "unicode", RCC_CLASS_TRANSLATE_CURRENT, "UTF-8", NULL, "Dummy", 0 },
163 { "in", RCC_CLASS_STANDARD, NULL, NULL, "Input Encoding", 0 },
164 { "out", RCC_CLASS_TRANSLATE_CURRENT, "LC_CTYPE", NULL, "Output Encoding", 0 },
165 { "id3", RCC_CLASS_STANDARD, "in", NULL, "ID3 Encoding", 0 },
166 { "id3v2", RCC_CLASS_STANDARD, "id3", NULL, "ID3 v.2 Encoding", 0},
167 { "pl", RCC_CLASS_STANDARD, "id3", NULL, "PlayList Title Encoding", 0},
168 { "plfs", RCC_CLASS_STANDARD, "pl", NULL, "PlayList File Encoding", 0 },
169 { "fs", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, "FileSystem Encoding", 0 },
170 { "oem", RCC_CLASS_STANDARD, "in", NULL, "Zip OEM Encoding", 0 },
171 { "iso", RCC_CLASS_STANDARD, "in", NULL, "Zip ISO Encoding", 0 },
172 { "ftp", RCC_CLASS_STANDARD, "in", NULL, "FTP Encoding", 0 },
173 { NULL }
174 };
175
GetClass(const char * name)176 rcc_class_id GetClass(const char *name) {
177 int i;
178
179 for (i = 1; classes[i].name; i++) {
180 if ((!strcasecmp(name, classes[i].name))||(!strcasecmp(name, classes[i].fullname)))
181 return i;
182 }
183 return (rcc_class_id)-1;
184 }
185
186 static char ask = 1;
187 static char process_subdirs = 1;
188 static rcc_language_id source_language_id, target_language_id;
189 static rcc_class_id source_class_id = 1, target_class_id = 2;
190 static char *efrom = NULL, *eto = NULL;
191
192 static int translate = RCC_OPTION_TRANSLATE_OFF;
193
194
195 char *Translate(const char *source);
196 int Stdin(const char *arg);
197 int Directory(const char *arg);
198
main(int argc,char * argv[])199 int main(int argc, char *argv[]) {
200 rcc_language_id language_id, current_language_id, english_language_id;
201
202 unsigned char c;
203
204 char *arg = NULL;
205
206 char *config_name = NULL;
207 char *cache_name = NULL;
208
209 char *from = "in";
210 char *to = "out";
211
212 unsigned char from_forced = 0;
213 unsigned char to_forced = 0;
214
215 char *lfrom = NULL;
216 char *lto = NULL;
217
218 int cache = RCC_OPTION_LEARNING_FLAG_USE;
219
220 int ldetect = 0;
221 int ldetect_all = 0;
222 int ldetect_force = 0;
223
224 unsigned long timeout = 0;
225 char offline = 0;
226
227 int option_index = 0;
228 while ((c = getopt_long(argc, argv, "yhe:f:l:t:", long_options, &option_index)) != (unsigned char)-1) {
229 switch (c) {
230 case 0:
231 break;
232 case OPT_HELP:
233 Usage(argc, argv);
234 exit(0);
235 break;
236 case OPT_CONFIG:
237 config_name = optarg;
238 break;
239 case OPT_CACHE:
240 cache_name = optarg;
241 case OPT_FROM:
242 from_forced = 1;
243 from = optarg;
244 break;
245 case OPT_TO:
246 to_forced = 1;
247 to = optarg;
248 break;
249 case OPT_ENCODING_IN:
250 efrom = optarg;
251 break;
252 case OPT_ENCODING_OUT:
253 eto = optarg;
254 break;
255 case OPT_LANGUAGE_IN:
256 lfrom = optarg;
257 /*
258 Selects main language, but for translation we can switch on
259 autodetection. Should do it manualy.
260 */
261 if (!ldetect_force) {
262 ldetect = 0;
263 ldetect_force = 1;
264 }
265
266 break;
267 case OPT_LANGUAGE_OUT:
268 lto = optarg;
269 break;
270 case OPT_TRANSLATION:
271 if (!optarg)
272 translate = RCC_OPTION_TRANSLATE_TO_ENGLISH;
273 else if (!strcasecmp(optarg, "full"))
274 translate = RCC_OPTION_TRANSLATE_FULL;
275 else if (!strcasecmp(optarg, "skip_parent"))
276 translate = RCC_OPTION_TRANSLATE_SKIP_PARENT;
277 else if (!strcasecmp(optarg, "skip_related"))
278 translate = RCC_OPTION_TRANSLATE_SKIP_RELATED;
279 else if (!strcasecmp(optarg, "english"))
280 translate = RCC_OPTION_TRANSLATE_TO_ENGLISH;
281 else if (!strcasecmp(optarg, "transliterate"))
282 translate = RCC_OPTION_TRANSLATE_TRANSLITERATE;
283 else if (!strcasecmp(optarg, "off"))
284 translate = RCC_OPTION_TRANSLATE_OFF;
285 else {
286 fprintf(stderr, "*** Unknown translation mode: %s\n\n", optarg);
287 Usage(argc, argv);
288 exit(0);
289 }
290
291 if (!ldetect_force) {
292 if (!strcasecmp(optarg, "off"))
293 ldetect = 0;
294 else
295 ldetect = 1;
296 }
297 break;
298 case OPT_CACHING:
299 if (!optarg)
300 cache = RCC_OPTION_LEARNING_FLAG_USE;
301 else if (!strcasecmp(optarg, "off"))
302 cache = 0;
303 else if (!strcasecmp(optarg, "use"))
304 cache = RCC_OPTION_LEARNING_FLAG_USE;
305 else if (!strcasecmp(optarg, "add"))
306 cache = RCC_OPTION_LEARNING_FLAG_USE|RCC_OPTION_LEARNING_FLAG_LEARN;
307 else if (!strcasecmp(optarg, "replace"))
308 cache = RCC_OPTION_LEARNING_FLAG_LEARN;
309 else {
310 fprintf(stderr, "*** Unknown caching mode: %s\n\n", optarg);
311 Usage(argc, argv);
312 exit(0);
313 }
314 break;
315 case OPT_AUTODETECT:
316 ldetect_force = 1;
317
318 if (!optarg) ldetect = 1;
319 else if (!strcasecmp(optarg, "off")) {
320 ldetect = 0;
321 ldetect_force = 1;
322 } else if (!strcasecmp(optarg, "on")) {
323 ldetect = 1;
324 ldetect_all = 0;
325 ldetect_force = 1;
326 } else if (!strcasecmp(optarg, "all")) {
327 ldetect = 1;
328 ldetect_all = 1;
329 ldetect_force = 1;
330 }
331 break;
332 case OPT_TIMEOUT:
333 timeout = atoi(optarg);
334 break;
335 case OPT_OFFLINE:
336 offline = 1;
337 break;
338 case OPT_SUBDIRS:
339 process_subdirs = 0;
340 break;
341 case OPT_YES:
342 ask = 0;
343 break;
344 default:
345 Usage(argc, argv);
346 exit(0);
347 }
348 }
349
350 if (optind < argc) {
351 if ((optind + 1) < argc) {
352 fprintf(stderr, "*** Invalid non-option arguments:\n");
353 for (;optind < argc;optind++) {
354 puts(argv[optind]);
355 }
356 fprintf(stderr, "\n\n");
357 Usage(argc,argv);
358 exit(0);
359 }
360 arg = argv[optind];
361 }
362
363 switch (mode) {
364 case MODE_DIRECTORY:
365 if (!from_forced) from = "fs";
366 if (!to_forced) to = "fs";
367 break;
368 default:
369 ;
370 }
371
372 setlocale(LC_ALL, "");
373
374
375
376 rccInit();
377 rccInitDefaultContext(NULL, 0, 0, classes, 0);
378 rccInitDb4(NULL, cache_name, 0);
379
380 if (timeout) rccSetOption(NULL, RCC_OPTION_TIMEOUT, timeout);
381
382 if (config_name) rccLoad(NULL, config_name);
383
384
385 rccSetOption(NULL, RCC_OPTION_LEARNING_MODE, cache);
386
387 if (translate != RCC_OPTION_TRANSLATE_OFF)
388 rccSetOption(NULL, RCC_OPTION_TRANSLATE, translate);
389
390 if (ldetect) {
391 rccSetOption(NULL, RCC_OPTION_AUTODETECT_LANGUAGE, 1);
392 if (ldetect_all) {
393 rccSetOption(NULL, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, 0);
394 }
395 }
396
397 // DS: More checks, sometimes we can skip that.
398 if ((lfrom)||(lto)) {
399 // if (lfrom) rccSetOption(NULL, RCC_OPTION_AUTODETECT_LANGUAGE, 1);
400 rccSetOption(NULL, RCC_OPTION_CONFIGURED_LANGUAGES_ONLY, 0);
401 }
402
403 #ifdef RCC_OPTION_OFFLINE
404 if (offline)
405 rccSetOption(NULL, RCC_OPTION_OFFLINE, 1);
406 #endif /* RCC_OPTION_OFFLINE */
407
408 if (from) {
409 source_class_id = GetClass(from);
410 if (source_class_id == (rcc_class_id)-1) {
411 rccFree();
412 fprintf(stderr, "*** Invalid source class (%s) specified\n", from);
413 exit(1);
414 }
415 }
416 if (to) {
417 target_class_id = GetClass(to);
418 if (target_class_id == (rcc_class_id)-1) {
419 rccFree();
420 fprintf(stderr, "*** Invalid target class (%s) specified\n", to);
421 exit(1);
422 }
423 }
424
425 current_language_id = rccGetCurrentLanguage(NULL);
426 english_language_id = rccGetLanguageByName(NULL, "en");
427
428 if (lfrom) {
429 source_language_id = rccGetLanguageByName(NULL, lfrom);
430 if (source_language_id == (rcc_language_id)-1) {
431 rccFree();
432 fprintf(stderr, "*** Invalid source language (%s) specified\n", lfrom);
433 exit(1);
434 }
435 } else source_language_id = current_language_id;
436
437 if (lto) {
438 target_language_id = rccGetLanguageByName(NULL, lto);
439 if (target_language_id == (rcc_language_id)-1) {
440 rccFree();
441 fprintf(stderr, "*** Invalid target language (%s) specified\n", lto);
442 exit(1);
443 }
444 } else target_language_id = current_language_id;
445
446 if (source_language_id == target_language_id) {
447 language_id = source_language_id;
448
449 if (language_id != current_language_id) {
450 if ((rccSetLanguage(NULL, language_id))||(!rccGetCurrentLanguageName(NULL))) {
451 rccFree();
452 fprintf(stderr, "*** Unable to set the specified language (%s)\n", rccGetLanguageName(NULL, language_id));
453 exit(1);
454 }
455 } else {
456 // Automatic
457 if (!rccGetCurrentLanguageName(NULL)) {
458 if (current_language_id != english_language_id) {
459 language_id = english_language_id;
460 rccSetLanguage(NULL, english_language_id);
461 }
462
463 if (!rccGetCurrentLanguageName(NULL)) {
464 rccFree();
465 fprintf(stderr, "*** Default language (%s) is not configured\n", rccGetLanguageName(NULL, current_language_id));
466 exit(1);
467 }
468 }
469 }
470
471 } else {
472 language_id = (rcc_language_id)-1;
473
474 // Checking if languages are selectable
475 if ((rccSetLanguage(NULL, source_language_id))||(!rccGetCurrentLanguageName(NULL))) {
476 rccFree();
477 fprintf(stderr, "*** Unable to set source language (%s)\n", rccGetLanguageName(NULL, source_language_id));
478 exit(1);
479 }
480 if ((rccSetLanguage(NULL, target_language_id))||(!rccGetCurrentLanguageName(NULL))) {
481 rccFree();
482 fprintf(stderr, "*** Unable to set target language (%s)\n", rccGetLanguageName(NULL, target_language_id));
483 exit(1);
484 }
485 }
486
487 switch (mode) {
488 case MODE_STDIN:
489 Stdin(arg);
490 break;
491 case MODE_DIRECTORY:
492 Directory(arg);
493 break;
494 case MODE_FILE:
495 fprintf(stderr, "*** Mode (FILE) is not supported in current version\n");
496 break;
497 case MODE_FILELIST:
498 fprintf(stderr, "*** Mode (FILELIST) is not supported in current version\n");
499 break;
500 }
501
502
503 rccFree();
504
505 return 0;
506 }
507
508 // DS. Dynamicaly raise string length?
Stdin(const char * arg)509 int Stdin(const char *arg) {
510 char *res;
511 char buf[16384];
512
513 while (fgets(buf,16384,stdin)) {
514 res = Translate(buf);
515 fprintf(stdout, res?res:buf);
516 if (res) free(res);
517 }
518
519 return 0;
520 }
521
Fullname(const char * path,const char * name)522 char *Fullname(const char *path, const char *name) {
523 char *res;
524
525 res = (char*)malloc(strlen(path) + strlen(name) + 2);
526 if (res) {
527 if (path[strlen(path)-1] == '/')
528 sprintf(res, "%s%s",path,name);
529 else
530 sprintf(res, "%s/%s",path,name);
531 }
532 return res;
533 }
534
535 // DS: We do not follow symbolic links (add option?)
536 // DS: Skipping everything begining with point (system files)
Directory(const char * arg)537 int Directory(const char *arg) {
538 int err;
539 struct stat st;
540
541 DIR *dir;
542 struct dirent *entry;
543 char *res;
544 char answer;
545
546 char stmp[255];
547 char *fn, *nfn;
548
549 if (!arg) arg = ".";
550
551 printf("Processing directory: %s\n", arg);
552
553 dir = opendir(arg);
554 if (!dir) {
555 fprintf(stderr, "*** Failed to process directory: %s\n", arg);
556 return -1;
557 }
558
559 entry = readdir(dir);
560 while (entry) {
561 if (entry->d_name[0] == '.') {
562 entry = readdir(dir);
563 continue;
564 }
565
566 res = Translate(entry->d_name);
567 if (res) {
568 if (strcmp(res, entry->d_name)) {
569 if (ask) {
570 printf("Rename \"%s\" to \"%s\" (y/[n]) ", entry->d_name, res);
571 scanf("%c", &answer);
572 if (answer != '\n') fgets(stmp, 255, stdin);
573 answer = ((answer=='y')||(answer=='Y'))?1:0;
574 } else {
575 answer = 1;
576 }
577
578 if (answer) {
579 fn = Fullname(arg, entry->d_name);
580 nfn = Fullname(arg, res);
581 if ((fn)&&(nfn)) {
582 if (!lstat(nfn, &st)) {
583 if (!ask) {
584 printf("Trying rename \"%s\" to \"%s\"\n", entry->d_name, res);
585 }
586
587 if (S_ISDIR(st.st_mode)) {
588 printf("*** Directory with that name exists, skipping\n");
589 answer = 0;
590 } else {
591 printf("*** File exists, overwrite (y/[n]) ");
592 scanf("%c", &answer);
593 if (answer != '\n') fgets(stmp, 255, stdin);
594 answer = ((answer=='y')||(answer=='Y'))?1:0;
595 }
596 }
597 if (answer) {
598 err = rename(fn, nfn);
599 }
600 } else err = ENOMEM;
601
602 if (fn) free(fn);
603 if (nfn) free(nfn);
604
605 if (err) {
606 printf("*** Renaming \"%s\" to \"%s\" is failed (errno: %u)\n", entry->d_name, res, errno);
607 } else if (!ask) {
608 printf("Rename completed: \"%s\" to \"%s\"\n", entry->d_name, res);
609 }
610 }
611 }
612 free(res);
613 }
614 entry = readdir(dir);
615 }
616 closedir(dir);
617
618 if (process_subdirs) {
619 dir = opendir(arg);
620 if (!dir) return 0;
621
622 entry = readdir(dir);
623 while (entry) {
624 if (entry->d_name[0] == '.') {
625 entry = readdir(dir);
626 continue;
627 }
628
629 fn = Fullname(arg, entry->d_name);
630 if (fn) {
631 if ((!lstat(fn, &st))&&((S_ISDIR(st.st_mode)))) {
632 Directory(fn);
633 }
634 free(fn);
635 }
636 entry = readdir(dir);
637 }
638 closedir(dir);
639 }
640
641
642 return 0;
643 }
644
Translate(const char * source)645 char *Translate(const char *source) {
646 rcc_string rccstring;
647 char *recoded, *stmp;
648
649 if (strlen(source)<2) return NULL;
650
651 if (source_language_id != target_language_id) {
652 rccSetLanguage(NULL, source_language_id);
653 }
654
655 if (efrom) rccstring = rccFromCharset(NULL, efrom, source);
656 else rccstring = rccFrom(NULL, source_class_id, source);
657
658 if (!rccstring) return NULL;
659
660 if (source_language_id != target_language_id)
661 rccSetLanguage(NULL, target_language_id);
662
663 if (eto) {
664 if (translate = RCC_OPTION_TRANSLATE_OFF) {
665 stmp = rccTo(NULL, target_class_id, rccstring);
666 if (stmp) {
667 recoded = rccRecodeCharsets(NULL, "UTF-8", eto, stmp);
668 if (recoded) free(stmp);
669 else recoded = stmp;
670 } else recoded = NULL;
671
672 } else {
673 recoded = rccToCharset(NULL, eto, rccstring);
674 }
675 } else recoded = rccTo(NULL, target_class_id, rccstring);
676
677 free(rccstring);
678 return recoded;
679 }
680
681