1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "texprint.h"
22 #include "libset.h"
23 #include "gretl_string_table.h"
24
25 #include <glib.h>
26
27 #ifdef ENABLE_NLS
28
29 static int numeric_c_locale_depth = 0;
30 static char *numeric_locale = NULL;
31 static int native_dot = -1;
32
33 /**
34 * gretl_push_c_numeric_locale:
35 *
36 * Description: Saves the current %LC_NUMERIC locale and sets it to "C".
37 * This way you can safely read/write floating point numbers all in the
38 * same format, using '.' as the decimal character. You should make sure
39 * that code between gretl_push_c_numeric_locale() and gretl_pop_c_numeric_locale()
40 * doesn't do any setlocale calls, or locale may end up in a strange setting.
41 * Also make sure to always pop the C numeric locale after you've pushed it.
42 * The calls can be nested.
43 **/
44
gretl_push_c_numeric_locale(void)45 void gretl_push_c_numeric_locale (void)
46 {
47 if (native_dot == -1) {
48 struct lconv *lc = localeconv();
49
50 native_dot = (*lc->decimal_point == '.');
51 }
52
53 if (native_dot == 1) {
54 return;
55 }
56
57 if (numeric_c_locale_depth == 0) {
58 free(numeric_locale);
59 numeric_locale = gretl_strdup(setlocale(LC_NUMERIC, NULL));
60 setlocale(LC_NUMERIC, "C");
61 }
62
63 numeric_c_locale_depth++;
64 }
65
66 /**
67 * gretl_pop_c_numeric_locale:
68 *
69 * Description: Restores the LC_NUMERIC locale to what it was
70 * before the matching gretl_push_c_numeric_locale(). If these calls
71 * were nested, then this is a no-op until we get to the most outermost
72 * layer. Code in between these should not do any setlocale calls
73 * to change the %LC_NUMERIC locale or things may come out very strange.
74 **/
75
gretl_pop_c_numeric_locale(void)76 void gretl_pop_c_numeric_locale (void)
77 {
78 if (numeric_c_locale_depth == 0) {
79 return;
80 }
81
82 numeric_c_locale_depth--;
83
84 if (numeric_c_locale_depth == 0 && numeric_locale != NULL) {
85 setlocale(LC_NUMERIC, numeric_locale);
86 free(numeric_locale);
87 numeric_locale = NULL;
88 }
89 }
90
91 /**
92 * doing_nls:
93 *
94 * Returns: 1 if NLS translation is in effect, 0 otherwise.
95 */
96
doing_nls(void)97 int doing_nls (void)
98 {
99 static int called, nls;
100
101 if (!called) {
102 nls = (strcmp("_Open data", _("_Open data")) ||
103 strcmp("Test statistic", _("Test statistic")) ||
104 strcmp("annual", _("annual")));
105 called = 1;
106 }
107
108 return nls;
109 }
110
111 static int decpoint;
112
113 /**
114 * reset_local_decpoint:
115 *
116 * Uses localeconv() to determine the representation of the decimal
117 * point in the current locale.
118 *
119 * Returns: the decimal character for the current locale.
120 */
121
reset_local_decpoint(void)122 int reset_local_decpoint (void)
123 {
124 struct lconv *lc = localeconv();
125
126 if (lc == NULL) {
127 fputs("localeconv() gave NULL!\n", stderr);
128 decpoint = '.';
129 } else if (lc->decimal_point == NULL) {
130 fputs("lc->decimal_point is NULL!\n", stderr);
131 decpoint = '.';
132 } else {
133 decpoint = *lc->decimal_point;
134 }
135
136 set_atof_point(decpoint);
137
138 #ifdef OS_OSX
139 fprintf(stderr, "via localeconv, decimal = '%c'\n", decpoint);
140 #endif
141
142 return decpoint;
143 }
144
145 /**
146 * get_local_decpoint:
147 *
148 * Returns: the decimal character for the current locale.
149 */
150
get_local_decpoint(void)151 int get_local_decpoint (void)
152 {
153 if (decpoint == 0) {
154 decpoint = reset_local_decpoint();
155 }
156 return decpoint;
157 }
158
159 #endif /* end of one NLS-only block */
160
chinese_locale(void)161 int chinese_locale (void)
162 {
163 int ret = 0;
164
165 #ifdef WIN32
166 gchar *loc = g_win32_getlocale();
167
168 ret = (loc != NULL && !strncmp(loc, "zh", 2));
169 g_free(loc);
170 #elif defined(ENABLE_NLS)
171 char *loc = setlocale(LC_ALL, NULL);
172
173 ret = (loc != NULL && !strncmp(loc, "zh", 2));
174 #endif
175
176 return ret;
177 }
178
japanese_locale(void)179 int japanese_locale (void)
180 {
181 int ret = 0;
182
183 #ifdef WIN32
184 gchar *loc = g_win32_getlocale();
185
186 ret = (loc != NULL && !strncmp(loc, "ja", 2));
187 g_free(loc);
188 #elif defined(ENABLE_NLS)
189 char *loc = setlocale(LC_ALL, NULL);
190
191 ret = (loc != NULL && !strncmp(loc, "ja", 2));
192 #endif
193
194 return ret;
195 }
196
east_asian_locale(void)197 int east_asian_locale (void)
198 {
199 int ret = 0;
200
201 #ifdef WIN32
202 gchar *loc = g_win32_getlocale();
203
204 ret = (loc != NULL && (!strncmp(loc, "zh", 2) ||
205 !strncmp(loc, "ja", 2)));
206 g_free(loc);
207 #elif defined(ENABLE_NLS)
208 char *loc = setlocale(LC_ALL, NULL);
209
210 ret = (loc != NULL && (!strncmp(loc, "zh", 2) ||
211 !strncmp(loc, "ja", 2)));
212 #endif
213
214 return ret;
215 }
216
217 #ifdef WIN32
218
219 struct localeinfo {
220 int id;
221 const char *code;
222 };
223
224 /* the following are strings accepted by setlocale()
225 on win32 */
226
227 static struct localeinfo locales[] = {
228 { LANG_AUTO, NULL },
229 { LANG_C, "english.1252" },
230 { LANG_SQ, "albanian.1250" },
231 { LANG_EU, "basque.1252" },
232 { LANG_BG, "bulgarian.1251" },
233 { LANG_CA, "catalan.1252" },
234 { LANG_ZH_TW, "chinese-traditional.950" },
235 { LANG_ZH_CN, "chinese-simplified.936" },
236 { LANG_CS, "czech.1250" },
237 { LANG_FR, "french.1252" },
238 { LANG_GL, "galician.1252" },
239 { LANG_DE, "german.1252" },
240 { LANG_EL, "greek.1253" },
241 { LANG_IT, "italian.1252" },
242 { LANG_JA, "japanese.932" },
243 { LANG_PL, "polish.1250" },
244 { LANG_PT, "portuguese.1252" },
245 { LANG_PT_BR, "portuguese-brazilian.1252" },
246 { LANG_RO, "romanian.1250" },
247 { LANG_RU, "russian.1251" },
248 { LANG_ES, "spanish.1252" },
249 { LANG_TR, "turkish.1254" },
250 { LANG_UK, "ukrainian.1251" },
251 { LANG_MAX, NULL }
252 };
253
locale_code_from_id(int langid)254 const char *locale_code_from_id (int langid)
255 {
256 int i;
257
258 for (i=0; i<LANG_MAX; i++) {
259 if (langid == locales[i].id) {
260 return locales[i].code;
261 }
262 }
263
264 return NULL;
265 }
266
267 #endif /* WIN32 */
268
269 struct langinfo {
270 int id;
271 const char *name;
272 const char *code;
273 };
274
275 static struct langinfo langs[] = {
276 { LANG_AUTO, "Automatic", NULL },
277 { LANG_C, "English", "C" },
278 { LANG_SQ, "Albanian", "sq_AL" },
279 { LANG_EU, "Basque", "eu_ES" },
280 { LANG_BG, "Bulgarian", "bg_BG" },
281 { LANG_CA, "Catalan", "ca_ES" },
282 { LANG_ZH_TW, "Chinese (Taiwan)", "zh_TW" },
283 { LANG_ZH_CN, "Chinese (simplified)", "zh_CN" },
284 { LANG_CS, "Czech", "cs_CZ" },
285 { LANG_FR, "French", "fr_FR" },
286 { LANG_GL, "Galician", "gl_ES" },
287 { LANG_DE, "German", "de_DE" },
288 { LANG_EL, "Greek", "el_GR" },
289 { LANG_IT, "Italian", "it_IT" },
290 { LANG_JA, "Japanese", "ja_JP" },
291 { LANG_PL, "Polish", "pl_PL" },
292 { LANG_PT, "Portuguese", "pt_PT" },
293 { LANG_PT_BR, "Portuguese (Brazil)", "pt_BR" },
294 { LANG_RO, "Romanian", "ro_RO" },
295 { LANG_RU, "Russian", "ru_RU" },
296 { LANG_ES, "Spanish", "es_ES" },
297 { LANG_TR, "Turkish", "tr_TR" },
298 { LANG_UK, "Ukrainian", "uk_UA" },
299 { LANG_MAX, NULL, NULL }
300 };
301
lang_string_from_id(int langid)302 const char *lang_string_from_id (int langid)
303 {
304 int i;
305
306 for (i=0; i<LANG_MAX; i++) {
307 if (langid == langs[i].id) {
308 return langs[i].name;
309 }
310 }
311
312 return NULL;
313 }
314
lang_id_from_name(const char * s)315 int lang_id_from_name (const char *s)
316 {
317 if (s != NULL || *s != '\0') {
318 int i;
319
320 for (i=0; i<LANG_MAX; i++) {
321 if (!strcmp(s, langs[i].name)) {
322 return langs[i].id;
323 }
324 }
325 }
326
327 return 0;
328 }
329
lang_code_from_id(int langid)330 const char *lang_code_from_id (int langid)
331 {
332 int i;
333
334 for (i=0; i<LANG_MAX; i++) {
335 if (langid == langs[i].id) {
336 return langs[i].code;
337 }
338 }
339
340 return NULL;
341 }
342
343 #ifdef WIN32
344
win32_set_numeric(const char * lang)345 static char *win32_set_numeric (const char *lang)
346 {
347 char *set = NULL;
348 int i;
349
350 for (i=LANG_SQ; i<LANG_MAX; i++) {
351 if (!strcmp(lang, langs[i].code) ||
352 !strncmp(lang, langs[i].code, 2)) {
353 set = setlocale(LC_NUMERIC, locales[i].code);
354 if (set != NULL) {
355 break;
356 }
357 }
358 }
359
360 return set;
361 }
362
363 #else /* !WIN32 */
364
365 # ifdef ENABLE_NLS
366
other_set_numeric(const char * lang)367 static char *other_set_numeric (const char *lang)
368 {
369 char *set = setlocale(LC_NUMERIC, lang);
370
371 if (set == NULL) {
372 char lfix[32];
373
374 sprintf(lfix, "%s.UTF-8", lang);
375 set = setlocale(LC_NUMERIC, lfix);
376 }
377
378 return set;
379 }
380
381 # endif /* ENABLE_NLS */
382
383 #endif /* WIN32 or not */
384
385 #ifdef ENABLE_NLS
386
387 /* more functions conditional on NLS enabled */
388
set_lcnumeric(int langid,int lcnumeric)389 void set_lcnumeric (int langid, int lcnumeric)
390 {
391 if (!lcnumeric || langid == LANG_C) {
392 setlocale(LC_NUMERIC, "C");
393 gretl_setenv("LC_NUMERIC", "C");
394 } else {
395 /* lcnumeric is selected and we're not in LANG_C */
396 const char *lang;
397 char *set = NULL;
398
399 if (langid == LANG_AUTO) {
400 /* respect the system LANG setting */
401 lang = getenv("LANG");
402 } else {
403 /* fake it from user preference */
404 lang = lang_code_from_id(langid);
405 }
406
407 if (lang != NULL) {
408 # ifdef WIN32
409 set = win32_set_numeric(lang);
410 # else
411 set = other_set_numeric(lang);
412 # endif
413 }
414 if (set == NULL) {
415 setlocale(LC_NUMERIC, "");
416 gretl_setenv("LC_NUMERIC", "");
417 }
418 }
419
420 reset_local_decpoint();
421 }
422
423 static int
set_locale_with_workaround(int langid,const char * lcode,char ** locp)424 set_locale_with_workaround (int langid, const char *lcode,
425 char **locp)
426 {
427 char *test = setlocale(LC_ALL, lcode);
428
429 # ifndef WIN32
430 if (test == NULL) {
431 char lfix[32];
432
433 sprintf(lfix, "%s.UTF-8", lcode);
434 test = setlocale(LC_ALL, lfix);
435 }
436 # endif
437
438 if (test != NULL) {
439 fprintf(stderr, "setlocale: '%s' -> '%s'\n", lcode, test);
440 if (strcmp("_File", _("_File")) == 0) {
441 const char *langstr;
442 char tmp[64];
443
444 langstr = lang_string_from_id(langid);
445 sscanf(langstr, "%s", tmp);
446 gretl_lower(tmp);
447 gretl_setenv("LANGUAGE", tmp);
448 }
449 }
450
451 if (locp != NULL && test != NULL) {
452 *locp = gretl_strdup(test);
453 }
454
455 return test == NULL;
456 }
457
458 # ifdef WIN32
459 # define get_setlocale_string(i) (locale_code_from_id(i))
460 # else
461 # define get_setlocale_string(i) (lang_code_from_id(i))
462 # endif
463
464 /* @langstr should be the English name of the selected language
465 as displayed in the GUI (e.g. "German", "French")
466 */
467
test_locale(const char * langstr)468 int test_locale (const char *langstr)
469 {
470 const char *lcode;
471 char *orig, ocpy[64];
472 int langid, err = 0;
473
474 langid = lang_id_from_name(langstr);
475 lcode = get_setlocale_string(langid);
476 orig = setlocale(LC_ALL, NULL);
477
478 gretl_error_clear();
479
480 *ocpy = '\0';
481 strncat(ocpy, orig, 63);
482
483 err = set_locale_with_workaround(langid, lcode, NULL);
484
485 if (err) {
486 gretl_errmsg_sprintf(_("%s: locale is not supported "
487 "on this system"), lcode);
488 } else {
489 setlocale(LC_ALL, ocpy); /* restore the original locale */
490 }
491
492 return err;
493 }
494
record_locale(char * locale)495 static void record_locale (char *locale)
496 {
497 int done = 0;
498
499 # ifdef WIN32
500 /* LANG probably not present, use setlocale output */
501 if (locale != NULL) {
502 gchar *s = g_win32_getlocale();
503
504 if (s != NULL) {
505 fprintf(stderr, "record_locale: got '%s'\n", s);
506 gretl_insert_builtin_string("lang", s);
507 g_free(s);
508 done = 1;
509 }
510 }
511 # else
512 char *lang = getenv("LANG");
513
514 if (lang != NULL) {
515 /* prefer using LANG */
516 if (strrchr(lang, '.') == NULL) {
517 gretl_insert_builtin_string("lang", lang);
518 } else {
519 char *tmp = gretl_strdup(lang);
520 char *p = strrchr(tmp, '.');
521
522 *p = '\0';
523 gretl_insert_builtin_string("lang", tmp);
524 free(tmp);
525 }
526 done = 1;
527 } else if (locale != NULL) {
528 /* use locale as fallback */
529 if (strrchr(locale, '.') == NULL) {
530 gretl_insert_builtin_string("lang", locale);
531 } else {
532 char *p = strrchr(locale, '.');
533
534 *p = '\0';
535 }
536 gretl_insert_builtin_string("lang", locale);
537 done = 1;
538 }
539 # endif
540
541 if (!done) {
542 gretl_insert_builtin_string("lang", "unknown");
543 }
544 }
545
force_language(int langid)546 int force_language (int langid)
547 {
548 const char *lcode = NULL;
549 char *locale = NULL;
550 int err = 0;
551
552 if (langid == LANG_AUTO) {
553 /* note: avoid getting long spew from Windows */
554 locale = gretl_strdup(setlocale(LC_COLLATE, NULL));
555 goto record;
556 }
557
558 if (langid == LANG_C) {
559 gretl_setenv("LANGUAGE", "english");
560 gretl_setenv("LANG", "C");
561 # ifdef WIN32
562 /* ensure we get an appropriate code page set */
563 setlocale(LC_ALL, "english.1252");
564 # else
565 setlocale(LC_ALL, "C");
566 #endif
567 } else {
568 /* setting a specific language other than English */
569 lcode = get_setlocale_string(langid);
570 if (lcode != NULL) {
571 # ifdef WIN32
572 locale = gretl_strdup(setlocale(LC_ALL, lcode));
573 fprintf(stderr, "lcode='%s' -> locale='%s'\n", lcode, locale);
574 if (locale == NULL) {
575 err = 1;
576 }
577 # else
578 err = set_locale_with_workaround(langid, lcode, &locale);
579 # endif
580 }
581 }
582
583 # if defined(WIN32)
584 if (langid == LANG_C) {
585 gretl_setenv("LC_ALL", "C");
586 textdomain("none");
587 } else if (lcode != NULL) {
588 lcode = lang_code_from_id(langid);
589 if (lcode != NULL) {
590 gretl_setenv("LC_ALL", lcode);
591 gretl_setenv("LANG", lcode);
592 }
593 }
594 # else /* elif defined(OS_OSX) */
595 if (langid != LANG_C) {
596 lcode = lang_code_from_id(langid);
597 if (lcode != NULL) {
598 gretl_setenv("LANGUAGE", lcode);
599 gretl_setenv("LANG", lcode);
600 }
601 }
602 # endif
603
604 record:
605
606 record_locale(locale);
607 free(locale);
608
609 return err;
610 }
611
612 #else /* !ENABLE_NLS */
613
614 /* stubs for NLS-disabled case */
615
set_lcnumeric(int langid,int lcnumeric)616 void set_lcnumeric (int langid, int lcnumeric)
617 {
618 return;
619 }
620
test_locale(const char * langstr)621 int test_locale (const char *langstr)
622 {
623 return 1;
624 }
625
force_language(int langid)626 int force_language (int langid)
627 {
628 return 1;
629 }
630
gretl_push_c_numeric_locale(void)631 void gretl_push_c_numeric_locale (void)
632 {
633 return;
634 }
635
gretl_pop_c_numeric_locale(void)636 void gretl_pop_c_numeric_locale (void)
637 {
638 return;
639 }
640
doing_nls(void)641 int doing_nls (void)
642 {
643 return 0;
644 }
645
reset_local_decpoint(void)646 int reset_local_decpoint (void)
647 {
648 return '.';
649 }
650
get_local_decpoint(void)651 int get_local_decpoint (void)
652 {
653 return '.';
654 }
655
656 #endif /* non-NLS stubs */
657
658 static void
iso_to_ascii_translate(char * targ,const char * src,int latin)659 iso_to_ascii_translate (char *targ, const char *src, int latin)
660 {
661 char *p;
662 const char *q;
663
664 p = targ;
665 q = src;
666
667 if (latin == 1) {
668 while (*q) {
669 unsigned char c = *q;
670
671 if (c == '\t' || c == '\n' || (c >= 32 && c <= 126)) {
672 *p++ = c;
673 } else if (c >= 192 && c <= 198) {
674 *p++ = 'A';
675 } else if (c == 199) {
676 *p++ = 'C';
677 } else if (c >= 200 && c <= 203) {
678 *p++ = 'E';
679 } else if (c >= 204 && c <= 207) {
680 *p++ = 'I';
681 } else if (c == 208) {
682 *p++ = 'D';
683 } else if (c == 209) {
684 *p++ = 'N';
685 } else if (c >= 210 && c <= 214) {
686 *p++ = 'O';
687 } else if (c == 216) {
688 *p++ = 'O';
689 } else if (c >= 217 && c <= 220) {
690 *p++ = 'U';
691 } else if (c == 221) {
692 *p++ = 'Y';
693 } else if (c >= 224 && c <= 230) {
694 *p++ = 'a';
695 } else if (c == 231) {
696 *p++ = 'c';
697 } else if (c >= 232 && c <= 235) {
698 *p++ = 'e';
699 } else if (c >= 236 && c <= 239) {
700 *p++ = 'i';
701 } else if (c == 240) {
702 *p++ = 'd';
703 } else if (c == 241) {
704 *p++ = 'n';
705 } else if (c >= 242 && c <= 246) {
706 *p++ = 'o';
707 } else if (c == 248) {
708 *p++ = 'o';
709 } else if (c >= 249 && c <= 252) {
710 *p++ = 'u';
711 } else if (c == 253) {
712 *p++ = 'y';
713 }
714 q++;
715 }
716 } else if (latin == 2) {
717 while (*q) {
718 unsigned char c = *q;
719
720 if (c == '\t' || c == '\n' || (c >= 32 && c <= 126)) {
721 *p++ = c;
722 }
723
724 #ifndef WIN32
725 if (c==161 || c==193 || c==194 || c==195 || c==196) {
726 *p++ = 'A';
727 }
728 #else
729 if (c==165 || c==193 || c==194 || c==195 || c==196) {
730 *p++ = 'A';
731 }
732 #endif
733 else if (c==198 || c==199 || c==200) {
734 *p++ = 'C';
735 }
736 else if (c==207 || c==208) {
737 *p++ = 'D';
738 }
739 else if (c==201 || c==202 || c==203 || c==204) {
740 *p++ = 'E';
741 }
742 else if (c==205 || c==206) {
743 *p++ = 'I';
744 }
745 #ifndef WIN32
746 else if (c==163 || c==165 || c==197) {
747 *p++ = 'L';
748 }
749 #else
750 else if (c==163 || c==188 || c==197) {
751 *p++ = 'L';
752 }
753 #endif
754 else if (c==209 || c==210) {
755 *p++ = 'N';
756 }
757 else if (c==211 || c==212 || c==213 || c==214) {
758 *p++ = 'O';
759 }
760 else if (c==192 || c==216) {
761 *p++ = 'R';
762 }
763 #ifndef WIN32
764 else if (c==166 || c==169 || c==170) {
765 *p++ = 'S';
766 }
767 #else
768 else if (c==138 || c==140 || c==170) {
769 *p++ = 'S';
770 }
771 #endif
772 #ifndef WIN32
773 else if (c==171 || c==222) {
774 *p++ = 'T';
775 }
776 #else
777 else if (c==141 || c==222) {
778 *p++ = 'T';
779 }
780 #endif
781 else if (c==217 || c==218 || c==219 || c==220) {
782 *p++ = 'U';
783 }
784 else if (c==221) {
785 *p++ = 'Y';
786 }
787 #ifndef WIN32
788 else if (c==172 || c==174 || c==175) {
789 *p++ = 'Z';
790 }
791 #else
792 else if (c==142 || c==143 || c==175) {
793 *p++ = 'Z';
794 }
795 #endif
796 #ifndef WIN32
797 else if (c==177 || c==225 || c==226 || c==227 || c==228) {
798 *p++ = 'a';
799 }
800 #else
801 else if (c==185 || c==225 || c==226 || c==227 || c==228) {
802 *p++ = 'a';
803 }
804 #endif
805 else if (c==230 || c==231 || c==232) {
806 *p++ = 'c';
807 }
808 else if (c==239 || c==240) {
809 *p++ = 'd';
810 }
811 else if (c==233 || c==234 || c==235 || c==236) {
812 *p++ = 'e';
813 }
814 else if (c==237 || c==238) {
815 *p++ = 'i';
816 }
817 #ifndef WIN32
818 else if (c==179 || c==181 || c==229) {
819 *p++ = 'l';
820 }
821 #else
822 else if (c==179 || c==190 || c==229) {
823 *p++ = 'l';
824 }
825 #endif
826 else if (c==241 || c==242) {
827 *p++ = 'n';
828 }
829 else if (c==243 || c==244 || c==245 || c==246) {
830 *p++ = 'o';
831 }
832 else if (c==224 || c==248) {
833 *p++ = 'r';
834 }
835 #ifndef WIN32
836 else if (c==182 || c==185 || c==186 || c==223) {
837 *p++ = 's';
838 }
839 #else
840 else if (c==154 || c==156 || c==186 || c==223) {
841 *p++ = 's';
842 }
843 #endif
844 #ifndef WIN32
845 else if (c==187 || c==254) {
846 *p++ = 't';
847 }
848 #else
849 else if (c==157 || c==254) {
850 *p++ = 't';
851 }
852 #endif
853 else if (c==249 || c==250 || c==251 || c==252) {
854 *p++ = 'u';
855 }
856 else if (c==253) {
857 *p++ = 'y';
858 }
859 #ifndef WIN32
860 else if (c==188 || c==190 || c==191) {
861 *p++ = 'z';
862 }
863 #else
864 else if (c==158 || c==159 || c==191) {
865 *p++ = 'z';
866 }
867 #endif
868 q++;
869 }
870 }
871
872 *p = '\0';
873 }
874
875 /* If @maxlen > 0 we limit the write to @targ to at most
876 @maxlen bytes (excluding the terminating nul byte).
877 If @sub > 0 we write this byte to @targ in place of
878 UTF-8 characters that we can't represent in ASCII,
879 provided they are lower than 0x0180.
880 */
881
u8_to_ascii_convert(char * targ,const char * src,int maxlen,char sub)882 char *u8_to_ascii_convert (char *targ, const char *src,
883 int maxlen, char sub)
884 {
885 int prevspace = 0;
886 const char *q = src;
887 char *p = targ;
888 gunichar u;
889 int c, skip;
890 int len = 0;
891
892 *p = '\0';
893
894 /* If sub == 0 we assume we're doing varnames and
895 so we skip all characters that are not valid in
896 a gretl varname. But if sub > 0 we pass through
897 all printable ASCII characters.
898 */
899
900 while (q && *q) {
901 skip = 0;
902 c = *q;
903 if (sub > 0 && ((c >= 32 && c <= 126) || c == 9 || c == 10)) {
904 /* ASCII printables */
905 *p++ = c;
906 q++;
907 } else if (c >= 0x0030 && c <= 0x0039) {
908 /* digits 0-9 */
909 *p++ = c;
910 q++;
911 } else if (c >= 0x0041 && c <= 0x005A) {
912 /* upper-case ASCII letters */
913 *p++ = c;
914 q++;
915 } else if (c >= 0x0061 && c <= 0x007A) {
916 /* lower-case ASCII letters */
917 *p++ = c;
918 q++;
919 } else if (c == 0x005F) {
920 /* underscore */
921 *p++ = c;
922 q++;
923 } else if (c == 0x0020) {
924 if (!prevspace) {
925 prevspace = 1;
926 *p++ = '_';
927 } else {
928 skip = 1;
929 }
930 q++;
931 } else {
932 /* handle Latin-1 and Latin-2, only */
933 u = g_utf8_get_char(q);
934 if (u >= 0x0180) {
935 skip = 1; /* can't handle */
936 } else if ((u >= 0x00C0 && u <= 0x00C6) || u == 0x0102 || u == 0x0104) {
937 *p++ = 'A';
938 } else if (u == 0x00C7 || u == 0x0106 || u == 0x010C) {
939 *p++ = 'C';
940 } else if ((u >= 0x00C8 && u <= 0x00CB) || u == 0x0118 || u == 0x011A) {
941 *p++ = 'E';
942 } else if (u >= 0x00CC && u <= 0x00CF) {
943 *p++ = 'I';
944 } else if (u == 0x00D0 || u == 0x010E || u == 0x0110 || u == 0x010E) {
945 *p++ = 'D';
946 } else if (u == 0x00D1 || u == 0x0143 || u == 0x0147) {
947 *p++ = 'N';
948 } else if (u == 0x00D8 || (u >= 0x00D2 && u <= 0x00D6) || u == 0x0150) {
949 *p++ = 'O';
950 } else if ((u >= 0x00D9 && u <= 0x00DC) || u == 0x016E || u == 0x0170) {
951 *p++ = 'U';
952 } else if (u == 0x00DD) {
953 *p++ = 'Y';
954 } else if (u == 0x00DE || u == 0x0164) {
955 *p++ = 'T';
956 } else if (u == 0x00DF) {
957 *p++ = 's';
958 } else if ((u >= 0x00E0 && u <= 0x00E6) || u == 0x0103) {
959 *p++ = 'a';
960 } else if (u == 0x00E7 || u == 0x0107) {
961 *p++ = 'c';
962 } else if ((u >= 0x00E8 && u <= 0x00EB) || u == 0x0119 || u == 0x011B) {
963 *p++ = 'e';
964 } else if (u >= 0x00EC && u <= 0x00EF) {
965 *p++ = 'i';
966 } else if (u == 0x00F0 || u == 0x0111 || u == 0x010F) {
967 *p++ = 'd';
968 } else if (u == 0x00F1 || u == 0x0144 || u == 0x0148) {
969 *p++ = 'n';
970 } else if (u == 0x00F8 || u == 0x0151 || (u >= 0x00F2 && u <= 0x00F6)) {
971 *p++ = 'o';
972 } else if ((u >= 0x00F9 && u <= 0x00FC) || u == 0x016F || u == 0x0171) {
973 *p++ = 'u';
974 } else if (u == 0x00FD || u == 0x00FF) {
975 *p++ = 'y';
976 } else if (u == 0x00FE || u == 0x0163) {
977 *p++ = 't';
978 } else if (u == 0x0141 || u == 0x013D || u == 0x0139) {
979 *p++ = 'L';
980 } else if (u == 0x0142 || u == 0x013E || u == 0x013A) {
981 *p++ = 'l';
982 } else if (u == 0x0154 || u == 0x0158) {
983 *p++ = 'R';
984 } else if (u == 0x0155 || u == 0x0159) {
985 *p++ = 'r';
986 } else if (u == 0x0160 || u == 0x015E) {
987 *p++ = 'S';
988 } else if (u == 0x0161 || u == 0x015F) {
989 *p = 's';
990 } else if (u == 0x0179 || u == 0x017D || u == 0x0178) {
991 *p = 'Z';
992 } else if (u == 0x017A || u == 0x017E || u == 0x017C) {
993 *p = 'z';
994 } else if (sub > 0) {
995 *p = sub;
996 } else {
997 skip = 1;
998 }
999 q = g_utf8_next_char(q);
1000 }
1001 if (c != 0x0020) {
1002 prevspace = 0;
1003 }
1004 if (!skip) len++;
1005 if (maxlen > 0 && len == maxlen) {
1006 break;
1007 }
1008 }
1009
1010 *p = '\0';
1011
1012 return targ;
1013 }
1014
real_iso_to_ascii(char * s,int latin)1015 static char *real_iso_to_ascii (char *s, int latin)
1016 {
1017 char *tmp;
1018
1019 tmp = malloc(strlen(s) + 1);
1020 if (tmp == NULL) {
1021 return NULL;
1022 }
1023
1024 if (latin != 1 && latin != 2) {
1025 /* fallback?? */
1026 latin = 1;
1027 }
1028
1029 iso_to_ascii_translate(tmp, s, latin);
1030
1031 strcpy(s, tmp);
1032 free(tmp);
1033
1034 return s;
1035 }
1036
iso_to_ascii(char * s)1037 char *iso_to_ascii (char *s)
1038 {
1039 return real_iso_to_ascii(s, 1);
1040 }
1041
sprint_l2_to_ascii(char * targ,const char * s,size_t len)1042 char *sprint_l2_to_ascii (char *targ, const char *s, size_t len)
1043 {
1044 iso_to_ascii_translate(targ, s, 2);
1045
1046 return targ;
1047 }
1048
asciify_utf8_varname(char * s)1049 char *asciify_utf8_varname (char *s)
1050 {
1051 char *tmp = malloc(32);
1052
1053 if (tmp != NULL) {
1054 u8_to_ascii_convert(tmp, s, 31, 0);
1055 strcpy(s, tmp);
1056 free(tmp);
1057 }
1058
1059 return s;
1060 }
1061
1062 /* Convert from UTF-8 text in @s to a form suitable for
1063 inclusion in RTF, where non-ASCII characters are
1064 recoded to escaped Unicode numbering. Return the
1065 converted text in a newly allocated string.
1066 */
1067
utf8_to_rtf(const char * s)1068 char *utf8_to_rtf (const char *s)
1069 {
1070 const char *nextp, *p = s;
1071 short int k;
1072 PRN *prn;
1073 char *ret = NULL;
1074 int err = 0;
1075
1076 prn = gretl_print_new(GRETL_PRINT_BUFFER, &err);
1077 if (prn == NULL) {
1078 return NULL;
1079 }
1080
1081 while (*p) {
1082 nextp = g_utf8_next_char(p);
1083 if (nextp - p > 1) {
1084 k = (short) g_utf8_get_char(p);
1085 pprintf(prn, "\\u%d?", k);
1086 } else {
1087 pputc(prn, *p);
1088 }
1089 p = nextp;
1090 }
1091
1092 ret = gretl_print_steal_buffer(prn);
1093 gretl_print_destroy(prn);
1094
1095 return ret;
1096 }
1097
1098 #define ascii_ctrl(a) (a == '\t' || a == '\n' || \
1099 a == '\r' || a == CTRLZ)
1100
gretl_is_ascii(const char * buf)1101 int gretl_is_ascii (const char *buf)
1102 {
1103 int a;
1104
1105 while (*buf) {
1106 a = *buf;
1107 if (a > 126 || (a < 32 && !(ascii_ctrl(a)))) {
1108 return 0;
1109 }
1110 buf++;
1111 }
1112
1113 return 1;
1114 }
1115
1116 /* We want to print @str in a field of @width (visible) characters,
1117 but @str may contain multi-byte characters. In that case, determine
1118 the adjustment to @width that is needed to avoid underrun and
1119 return the adjusted value.
1120 */
1121
get_utf_width(const char * str,int width)1122 int get_utf_width (const char *str, int width)
1123 {
1124 /* the number of "invisible" bytes */
1125 int invis = strlen(str) - g_utf8_strlen(str, -1);
1126
1127 return width + invis;
1128 }
1129
get_translated_width(const char * str)1130 int get_translated_width (const char *str)
1131 {
1132 int w = strlen(str);
1133
1134 w += w - g_utf8_strlen(str, -1);
1135
1136 return w;
1137 }
1138
1139 /* utility functionality: recoding of an entire file:
1140 we start with a couple of static "helpers"
1141 */
1142
file_get_content(const char * fname,gsize * bytes,PRN * prn,int * err)1143 static gchar *file_get_content (const char *fname,
1144 gsize *bytes,
1145 PRN *prn,
1146 int *err)
1147 {
1148 GError *gerr = NULL;
1149 gchar *buf = NULL;
1150 int ok;
1151
1152 ok = g_file_get_contents(fname, &buf, bytes, &gerr);
1153
1154 if (ok) {
1155 pprintf(prn, "got content, %" G_GSIZE_FORMAT " bytes\n", *bytes);
1156 } else {
1157 *err = E_FOPEN;
1158 if (gerr != NULL) {
1159 gretl_errmsg_set(gerr->message);
1160 g_error_free(gerr);
1161 }
1162 }
1163
1164 return buf;
1165 }
1166
file_set_content(const char * fname,const gchar * buf,gsize buflen)1167 static int file_set_content (const char *fname,
1168 const gchar *buf,
1169 gsize buflen)
1170 {
1171 GError *gerr = NULL;
1172 int ok, err = 0;
1173
1174 ok = g_file_set_contents(fname, buf, buflen, &gerr);
1175
1176 if (!ok) {
1177 err = E_FOPEN;
1178 if (gerr != NULL) {
1179 gretl_errmsg_set(gerr->message);
1180 g_error_free(gerr);
1181 }
1182 }
1183
1184 return err;
1185 }
1186
glib_recode_buffer(const char * buf,const char * from_set,const char * to_set,gsize bytes,gsize * written,int * err)1187 static gchar *glib_recode_buffer (const char *buf,
1188 const char *from_set,
1189 const char *to_set,
1190 gsize bytes,
1191 gsize *written,
1192 int *err)
1193 {
1194 gchar *trbuf = NULL;
1195 GError *gerr = NULL;
1196 gsize got = 0;
1197
1198 trbuf = g_convert(buf, bytes, to_set, from_set,
1199 &got, written, &gerr);
1200
1201 if (gerr != NULL) {
1202 *err = E_DATA;
1203 gretl_errmsg_set(gerr->message);
1204 g_error_free(gerr);
1205 }
1206
1207 return trbuf;
1208 }
1209
1210 /**
1211 * gretl_recode_file:
1212 * @path1: path to original file.
1213 * @path2: path to file to be written.
1214 * @from_set: the codeset of the original file.
1215 * @to_set: the codeset for the recoded file.
1216 * @prn: gretl printer (for a few comments) or NULL.
1217 *
1218 * Returns: 0 on success or non-zero code on error.
1219 */
1220
gretl_recode_file(const char * path1,const char * path2,const char * from_set,const char * to_set,PRN * prn)1221 int gretl_recode_file (const char *path1, const char *path2,
1222 const char *from_set, const char *to_set,
1223 PRN *prn)
1224 {
1225 gchar *buf = NULL;
1226 gsize bytes = 0;
1227 int err = 0;
1228
1229 /* get entire content of original file */
1230 buf = file_get_content(path1, &bytes, prn, &err);
1231
1232 if (!err) {
1233 gsize written = 0;
1234 gchar *trbuf = glib_recode_buffer(buf, from_set, to_set,
1235 bytes, &written, &err);
1236
1237 if (!err) {
1238 /* write recoded text to file */
1239 pprintf(prn, "recoded: %" G_GSIZE_FORMAT " bytes\n", written);
1240 err = file_set_content(path2, trbuf, written);
1241 }
1242 g_free(trbuf);
1243 }
1244
1245 g_free(buf);
1246
1247 return err;
1248 }
1249