1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include "libgretl.h"
21 #include "texprint.h"
22 #include "libset.h"
23 #include "gretl_string_table.h"
24 
25 #include <glib.h>
26 
27 #ifdef ENABLE_NLS
28 
29 static int numeric_c_locale_depth = 0;
30 static char *numeric_locale = NULL;
31 static int native_dot = -1;
32 
33 /**
34  * gretl_push_c_numeric_locale:
35  *
36  * Description: Saves the current %LC_NUMERIC locale and sets it to "C".
37  * This way you can safely read/write floating point numbers all in the
38  * same format, using '.' as the decimal character.  You should make sure
39  * that code between gretl_push_c_numeric_locale() and gretl_pop_c_numeric_locale()
40  * doesn't do any setlocale calls, or locale may end up in a strange setting.
41  * Also make sure to always pop the C numeric locale after you've pushed it.
42  * The calls can be nested.
43  **/
44 
gretl_push_c_numeric_locale(void)45 void gretl_push_c_numeric_locale (void)
46 {
47     if (native_dot == -1) {
48 	struct lconv *lc = localeconv();
49 
50 	native_dot = (*lc->decimal_point == '.');
51     }
52 
53     if (native_dot == 1) {
54 	return;
55     }
56 
57     if (numeric_c_locale_depth == 0) {
58 	free(numeric_locale);
59 	numeric_locale = gretl_strdup(setlocale(LC_NUMERIC, NULL));
60 	setlocale(LC_NUMERIC, "C");
61     }
62 
63     numeric_c_locale_depth++;
64 }
65 
66 /**
67  * gretl_pop_c_numeric_locale:
68  *
69  * Description:  Restores the LC_NUMERIC locale to what it was
70  * before the matching gretl_push_c_numeric_locale(). If these calls
71  * were nested, then this is a no-op until we get to the most outermost
72  * layer. Code in between these should not do any setlocale calls
73  * to change the %LC_NUMERIC locale or things may come out very strange.
74  **/
75 
gretl_pop_c_numeric_locale(void)76 void gretl_pop_c_numeric_locale (void)
77 {
78     if (numeric_c_locale_depth == 0) {
79 	return;
80     }
81 
82     numeric_c_locale_depth--;
83 
84     if (numeric_c_locale_depth == 0 && numeric_locale != NULL) {
85 	setlocale(LC_NUMERIC, numeric_locale);
86 	free(numeric_locale);
87 	numeric_locale = NULL;
88     }
89 }
90 
91 /**
92  * doing_nls:
93  *
94  * Returns: 1 if NLS translation is in effect, 0 otherwise.
95  */
96 
doing_nls(void)97 int doing_nls (void)
98 {
99     static int called, nls;
100 
101     if (!called) {
102 	nls = (strcmp("_Open data", _("_Open data")) ||
103 	       strcmp("Test statistic", _("Test statistic")) ||
104 	       strcmp("annual", _("annual")));
105 	called = 1;
106     }
107 
108     return nls;
109 }
110 
111 static int decpoint;
112 
113 /**
114  * reset_local_decpoint:
115  *
116  * Uses localeconv() to determine the representation of the decimal
117  * point in the current locale.
118  *
119  * Returns: the decimal character for the current locale.
120  */
121 
reset_local_decpoint(void)122 int reset_local_decpoint (void)
123 {
124     struct lconv *lc = localeconv();
125 
126     if (lc == NULL) {
127 	fputs("localeconv() gave NULL!\n", stderr);
128 	decpoint = '.';
129     } else if (lc->decimal_point == NULL) {
130 	fputs("lc->decimal_point is NULL!\n", stderr);
131 	decpoint = '.';
132     } else {
133 	decpoint = *lc->decimal_point;
134     }
135 
136     set_atof_point(decpoint);
137 
138 #ifdef OS_OSX
139     fprintf(stderr, "via localeconv, decimal = '%c'\n", decpoint);
140 #endif
141 
142     return decpoint;
143 }
144 
145 /**
146  * get_local_decpoint:
147  *
148  * Returns: the decimal character for the current locale.
149  */
150 
get_local_decpoint(void)151 int get_local_decpoint (void)
152 {
153     if (decpoint == 0) {
154 	decpoint = reset_local_decpoint();
155     }
156     return decpoint;
157 }
158 
159 #endif /* end of one NLS-only block */
160 
chinese_locale(void)161 int chinese_locale (void)
162 {
163     int ret = 0;
164 
165 #ifdef WIN32
166     gchar *loc = g_win32_getlocale();
167 
168     ret = (loc != NULL && !strncmp(loc, "zh", 2));
169     g_free(loc);
170 #elif defined(ENABLE_NLS)
171     char *loc = setlocale(LC_ALL, NULL);
172 
173     ret = (loc != NULL && !strncmp(loc, "zh", 2));
174 #endif
175 
176     return ret;
177 }
178 
japanese_locale(void)179 int japanese_locale (void)
180 {
181     int ret = 0;
182 
183 #ifdef WIN32
184     gchar *loc = g_win32_getlocale();
185 
186     ret = (loc != NULL && !strncmp(loc, "ja", 2));
187     g_free(loc);
188 #elif defined(ENABLE_NLS)
189     char *loc = setlocale(LC_ALL, NULL);
190 
191     ret = (loc != NULL && !strncmp(loc, "ja", 2));
192 #endif
193 
194     return ret;
195 }
196 
east_asian_locale(void)197 int east_asian_locale (void)
198 {
199     int ret = 0;
200 
201 #ifdef WIN32
202     gchar *loc = g_win32_getlocale();
203 
204     ret = (loc != NULL && (!strncmp(loc, "zh", 2) ||
205 			   !strncmp(loc, "ja", 2)));
206     g_free(loc);
207 #elif defined(ENABLE_NLS)
208     char *loc = setlocale(LC_ALL, NULL);
209 
210     ret = (loc != NULL && (!strncmp(loc, "zh", 2) ||
211 			   !strncmp(loc, "ja", 2)));
212 #endif
213 
214     return ret;
215 }
216 
217 #ifdef WIN32
218 
219 struct localeinfo {
220     int id;
221     const char *code;
222 };
223 
224 /* the following are strings accepted by setlocale()
225    on win32 */
226 
227 static struct localeinfo locales[] = {
228     { LANG_AUTO,  NULL },
229     { LANG_C,     "english.1252" },
230     { LANG_SQ,    "albanian.1250" },
231     { LANG_EU,    "basque.1252" },
232     { LANG_BG,    "bulgarian.1251" },
233     { LANG_CA,    "catalan.1252" },
234     { LANG_ZH_TW, "chinese-traditional.950" },
235     { LANG_ZH_CN, "chinese-simplified.936" },
236     { LANG_CS,    "czech.1250" },
237     { LANG_FR,    "french.1252" },
238     { LANG_GL,    "galician.1252" },
239     { LANG_DE,    "german.1252" },
240     { LANG_EL,    "greek.1253" },
241     { LANG_IT,    "italian.1252" },
242     { LANG_JA,    "japanese.932" },
243     { LANG_PL,    "polish.1250" },
244     { LANG_PT,    "portuguese.1252" },
245     { LANG_PT_BR, "portuguese-brazilian.1252" },
246     { LANG_RO,    "romanian.1250" },
247     { LANG_RU,    "russian.1251" },
248     { LANG_ES,    "spanish.1252" },
249     { LANG_TR,    "turkish.1254" },
250     { LANG_UK,    "ukrainian.1251" },
251     { LANG_MAX,    NULL }
252 };
253 
locale_code_from_id(int langid)254 const char *locale_code_from_id (int langid)
255 {
256     int i;
257 
258     for (i=0; i<LANG_MAX; i++) {
259 	if (langid == locales[i].id) {
260 	    return locales[i].code;
261 	}
262     }
263 
264     return NULL;
265 }
266 
267 #endif /* WIN32 */
268 
269 struct langinfo {
270     int id;
271     const char *name;
272     const char *code;
273 };
274 
275 static struct langinfo langs[] = {
276     { LANG_AUTO,  "Automatic",            NULL    },
277     { LANG_C,     "English",              "C"     },
278     { LANG_SQ,    "Albanian",             "sq_AL" },
279     { LANG_EU,    "Basque",               "eu_ES" },
280     { LANG_BG,    "Bulgarian",            "bg_BG" },
281     { LANG_CA,    "Catalan",              "ca_ES" },
282     { LANG_ZH_TW, "Chinese (Taiwan)",     "zh_TW" },
283     { LANG_ZH_CN, "Chinese (simplified)", "zh_CN" },
284     { LANG_CS,    "Czech",                "cs_CZ" },
285     { LANG_FR,    "French",               "fr_FR" },
286     { LANG_GL,    "Galician",             "gl_ES" },
287     { LANG_DE,    "German",               "de_DE" },
288     { LANG_EL,    "Greek",                "el_GR" },
289     { LANG_IT,    "Italian",              "it_IT" },
290     { LANG_JA,    "Japanese",             "ja_JP" },
291     { LANG_PL,    "Polish",               "pl_PL" },
292     { LANG_PT,    "Portuguese",           "pt_PT" },
293     { LANG_PT_BR, "Portuguese (Brazil)",  "pt_BR" },
294     { LANG_RO,    "Romanian",             "ro_RO" },
295     { LANG_RU,    "Russian",              "ru_RU" },
296     { LANG_ES,    "Spanish",              "es_ES" },
297     { LANG_TR,    "Turkish",              "tr_TR" },
298     { LANG_UK,    "Ukrainian",            "uk_UA" },
299     { LANG_MAX,    NULL,                   NULL   }
300 };
301 
lang_string_from_id(int langid)302 const char *lang_string_from_id (int langid)
303 {
304     int i;
305 
306     for (i=0; i<LANG_MAX; i++) {
307 	if (langid == langs[i].id) {
308 	    return langs[i].name;
309 	}
310     }
311 
312     return NULL;
313 }
314 
lang_id_from_name(const char * s)315 int lang_id_from_name (const char *s)
316 {
317     if (s != NULL || *s != '\0') {
318 	int i;
319 
320 	for (i=0; i<LANG_MAX; i++) {
321 	    if (!strcmp(s, langs[i].name)) {
322 		return langs[i].id;
323 	    }
324 	}
325     }
326 
327     return 0;
328 }
329 
lang_code_from_id(int langid)330 const char *lang_code_from_id (int langid)
331 {
332     int i;
333 
334     for (i=0; i<LANG_MAX; i++) {
335 	if (langid == langs[i].id) {
336 	    return langs[i].code;
337 	}
338     }
339 
340     return NULL;
341 }
342 
343 #ifdef WIN32
344 
win32_set_numeric(const char * lang)345 static char *win32_set_numeric (const char *lang)
346 {
347     char *set = NULL;
348     int i;
349 
350     for (i=LANG_SQ; i<LANG_MAX; i++) {
351 	if (!strcmp(lang, langs[i].code) ||
352 	    !strncmp(lang, langs[i].code, 2)) {
353 	    set = setlocale(LC_NUMERIC, locales[i].code);
354 	    if (set != NULL) {
355 		break;
356 	    }
357 	}
358     }
359 
360     return set;
361 }
362 
363 #else /* !WIN32 */
364 
365 # ifdef ENABLE_NLS
366 
other_set_numeric(const char * lang)367 static char *other_set_numeric (const char *lang)
368 {
369     char *set = setlocale(LC_NUMERIC, lang);
370 
371     if (set == NULL) {
372 	char lfix[32];
373 
374 	sprintf(lfix, "%s.UTF-8", lang);
375 	set = setlocale(LC_NUMERIC, lfix);
376     }
377 
378     return set;
379 }
380 
381 # endif /* ENABLE_NLS */
382 
383 #endif /* WIN32 or not */
384 
385 #ifdef ENABLE_NLS
386 
387 /* more functions conditional on NLS enabled */
388 
set_lcnumeric(int langid,int lcnumeric)389 void set_lcnumeric (int langid, int lcnumeric)
390 {
391     if (!lcnumeric || langid == LANG_C) {
392 	setlocale(LC_NUMERIC, "C");
393 	gretl_setenv("LC_NUMERIC", "C");
394     } else {
395 	/* lcnumeric is selected and we're not in LANG_C */
396 	const char *lang;
397 	char *set = NULL;
398 
399 	if (langid == LANG_AUTO) {
400 	    /* respect the system LANG setting */
401 	    lang = getenv("LANG");
402 	} else {
403 	    /* fake it from user preference */
404 	    lang = lang_code_from_id(langid);
405 	}
406 
407 	if (lang != NULL) {
408 # ifdef WIN32
409 	    set = win32_set_numeric(lang);
410 # else
411 	    set = other_set_numeric(lang);
412 # endif
413 	}
414 	if (set == NULL) {
415 	    setlocale(LC_NUMERIC, "");
416 	    gretl_setenv("LC_NUMERIC", "");
417 	}
418     }
419 
420     reset_local_decpoint();
421 }
422 
423 static int
set_locale_with_workaround(int langid,const char * lcode,char ** locp)424 set_locale_with_workaround (int langid, const char *lcode,
425 			    char **locp)
426 {
427     char *test = setlocale(LC_ALL, lcode);
428 
429 # ifndef WIN32
430     if (test == NULL) {
431 	char lfix[32];
432 
433 	sprintf(lfix, "%s.UTF-8", lcode);
434 	test = setlocale(LC_ALL, lfix);
435     }
436 # endif
437 
438     if (test != NULL) {
439 	fprintf(stderr, "setlocale: '%s' -> '%s'\n", lcode, test);
440 	if (strcmp("_File", _("_File")) == 0) {
441 	    const char *langstr;
442 	    char tmp[64];
443 
444 	    langstr = lang_string_from_id(langid);
445 	    sscanf(langstr, "%s", tmp);
446 	    gretl_lower(tmp);
447 	    gretl_setenv("LANGUAGE", tmp);
448 	}
449     }
450 
451     if (locp != NULL && test != NULL) {
452 	*locp = gretl_strdup(test);
453     }
454 
455     return test == NULL;
456 }
457 
458 # ifdef WIN32
459 # define get_setlocale_string(i) (locale_code_from_id(i))
460 # else
461 # define get_setlocale_string(i) (lang_code_from_id(i))
462 # endif
463 
464 /* @langstr should be the English name of the selected language
465    as displayed in the GUI (e.g. "German", "French")
466 */
467 
test_locale(const char * langstr)468 int test_locale (const char *langstr)
469 {
470     const char *lcode;
471     char *orig, ocpy[64];
472     int langid, err = 0;
473 
474     langid = lang_id_from_name(langstr);
475     lcode = get_setlocale_string(langid);
476     orig = setlocale(LC_ALL, NULL);
477 
478     gretl_error_clear();
479 
480     *ocpy = '\0';
481     strncat(ocpy, orig, 63);
482 
483     err = set_locale_with_workaround(langid, lcode, NULL);
484 
485     if (err) {
486 	gretl_errmsg_sprintf(_("%s: locale is not supported "
487 			       "on this system"), lcode);
488     } else {
489 	setlocale(LC_ALL, ocpy); /* restore the original locale */
490     }
491 
492     return err;
493 }
494 
record_locale(char * locale)495 static void record_locale (char *locale)
496 {
497     int done = 0;
498 
499 # ifdef WIN32
500     /* LANG probably not present, use setlocale output */
501     if (locale != NULL) {
502 	gchar *s = g_win32_getlocale();
503 
504 	if (s != NULL) {
505             fprintf(stderr, "record_locale: got '%s'\n", s);
506 	    gretl_insert_builtin_string("lang", s);
507 	    g_free(s);
508 	    done = 1;
509 	}
510     }
511 # else
512     char *lang = getenv("LANG");
513 
514     if (lang != NULL) {
515 	/* prefer using LANG */
516 	if (strrchr(lang, '.') == NULL) {
517 	    gretl_insert_builtin_string("lang", lang);
518 	} else {
519 	    char *tmp = gretl_strdup(lang);
520 	    char *p = strrchr(tmp, '.');
521 
522 	    *p = '\0';
523 	    gretl_insert_builtin_string("lang", tmp);
524 	    free(tmp);
525 	}
526 	done = 1;
527     } else if (locale != NULL) {
528 	/* use locale as fallback */
529 	if (strrchr(locale, '.') == NULL) {
530 	    gretl_insert_builtin_string("lang", locale);
531 	} else {
532 	    char *p = strrchr(locale, '.');
533 
534 	    *p = '\0';
535 	}
536 	gretl_insert_builtin_string("lang", locale);
537 	done = 1;
538     }
539 # endif
540 
541     if (!done) {
542 	gretl_insert_builtin_string("lang", "unknown");
543     }
544 }
545 
force_language(int langid)546 int force_language (int langid)
547 {
548     const char *lcode = NULL;
549     char *locale = NULL;
550     int err = 0;
551 
552     if (langid == LANG_AUTO) {
553 	/* note: avoid getting long spew from Windows */
554 	locale = gretl_strdup(setlocale(LC_COLLATE, NULL));
555 	goto record;
556     }
557 
558     if (langid == LANG_C) {
559 	gretl_setenv("LANGUAGE", "english");
560 	gretl_setenv("LANG", "C");
561 # ifdef WIN32
562 	/* ensure we get an appropriate code page set */
563 	setlocale(LC_ALL, "english.1252");
564 # else
565 	setlocale(LC_ALL, "C");
566 #endif
567     } else {
568 	/* setting a specific language other than English */
569 	lcode = get_setlocale_string(langid);
570 	if (lcode != NULL) {
571 # ifdef WIN32
572 	    locale = gretl_strdup(setlocale(LC_ALL, lcode));
573             fprintf(stderr, "lcode='%s' -> locale='%s'\n", lcode, locale);
574 	    if (locale == NULL) {
575 		err = 1;
576 	    }
577 # else
578 	    err = set_locale_with_workaround(langid, lcode, &locale);
579 # endif
580 	}
581     }
582 
583 # if defined(WIN32)
584     if (langid == LANG_C) {
585 	gretl_setenv("LC_ALL", "C");
586 	textdomain("none");
587     } else if (lcode != NULL) {
588         lcode = lang_code_from_id(langid);
589 	if (lcode != NULL) {
590 	    gretl_setenv("LC_ALL", lcode);
591 	    gretl_setenv("LANG", lcode);
592 	}
593     }
594 # else /* elif defined(OS_OSX) */
595     if (langid != LANG_C) {
596 	lcode = lang_code_from_id(langid);
597 	if (lcode != NULL) {
598 	    gretl_setenv("LANGUAGE", lcode);
599 	    gretl_setenv("LANG", lcode);
600 	}
601     }
602 # endif
603 
604  record:
605 
606     record_locale(locale);
607     free(locale);
608 
609     return err;
610 }
611 
612 #else /* !ENABLE_NLS */
613 
614 /* stubs for NLS-disabled case */
615 
set_lcnumeric(int langid,int lcnumeric)616 void set_lcnumeric (int langid, int lcnumeric)
617 {
618     return;
619 }
620 
test_locale(const char * langstr)621 int test_locale (const char *langstr)
622 {
623     return 1;
624 }
625 
force_language(int langid)626 int force_language (int langid)
627 {
628     return 1;
629 }
630 
gretl_push_c_numeric_locale(void)631 void gretl_push_c_numeric_locale (void)
632 {
633     return;
634 }
635 
gretl_pop_c_numeric_locale(void)636 void gretl_pop_c_numeric_locale (void)
637 {
638     return;
639 }
640 
doing_nls(void)641 int doing_nls (void)
642 {
643     return 0;
644 }
645 
reset_local_decpoint(void)646 int reset_local_decpoint (void)
647 {
648     return '.';
649 }
650 
get_local_decpoint(void)651 int get_local_decpoint (void)
652 {
653     return '.';
654 }
655 
656 #endif /* non-NLS stubs */
657 
658 static void
iso_to_ascii_translate(char * targ,const char * src,int latin)659 iso_to_ascii_translate (char *targ, const char *src, int latin)
660 {
661     char *p;
662     const char *q;
663 
664     p = targ;
665     q = src;
666 
667     if (latin == 1) {
668 	while (*q) {
669 	    unsigned char c = *q;
670 
671 	    if (c == '\t' || c == '\n' || (c >= 32 && c <= 126)) {
672 		*p++ = c;
673 	    } else if (c >= 192 && c <= 198) {
674 		*p++ = 'A';
675 	    } else if (c == 199) {
676 		*p++ = 'C';
677 	    } else if (c >= 200 && c <= 203) {
678 		*p++ = 'E';
679 	    } else if (c >= 204 && c <= 207) {
680 		*p++ = 'I';
681 	    } else if (c == 208) {
682 		*p++ = 'D';
683 	    } else if (c == 209) {
684 		*p++ = 'N';
685 	    } else if (c >= 210 && c <= 214) {
686 		*p++ = 'O';
687 	    } else if (c == 216) {
688 		*p++ = 'O';
689 	    } else if (c >= 217 && c <= 220) {
690 		*p++ = 'U';
691 	    } else if (c == 221) {
692 		*p++ = 'Y';
693 	    } else if (c >= 224 && c <= 230) {
694 		*p++ = 'a';
695 	    } else if (c == 231) {
696 		*p++ = 'c';
697 	    } else if (c >= 232 && c <= 235) {
698 		*p++ = 'e';
699 	    } else if (c >= 236 && c <= 239) {
700 		*p++ = 'i';
701 	    } else if (c == 240) {
702 		*p++ = 'd';
703 	    } else if (c == 241) {
704 		*p++ = 'n';
705 	    } else if (c >= 242 && c <= 246) {
706 		*p++ = 'o';
707 	    } else if (c == 248) {
708 		*p++ = 'o';
709 	    } else if (c >= 249 && c <= 252) {
710 		*p++ = 'u';
711 	    } else if (c == 253) {
712 		*p++ = 'y';
713 	    }
714 	    q++;
715 	}
716     } else if (latin == 2) {
717 	while (*q) {
718 	    unsigned char c = *q;
719 
720 	    if (c == '\t' || c == '\n' || (c >= 32 && c <= 126)) {
721 		*p++ = c;
722 	    }
723 
724 #ifndef WIN32
725 	    if (c==161 || c==193 || c==194 || c==195 || c==196) {
726 		*p++ = 'A';
727 	    }
728 #else
729 	    if (c==165 || c==193 || c==194 || c==195 || c==196) {
730 		*p++ = 'A';
731 	    }
732 #endif
733 	    else if (c==198 || c==199 || c==200) {
734 		*p++ = 'C';
735 	    }
736 	    else if (c==207 || c==208) {
737 		*p++ = 'D';
738 	    }
739 	    else if (c==201 || c==202 || c==203 || c==204) {
740 		*p++ = 'E';
741 	    }
742 	    else if (c==205 || c==206) {
743 		*p++ = 'I';
744 	    }
745 #ifndef WIN32
746 	    else if (c==163 || c==165 || c==197) {
747 		*p++ = 'L';
748 	    }
749 #else
750 	    else if (c==163 || c==188 || c==197) {
751 		*p++ = 'L';
752 	    }
753 #endif
754 	    else if (c==209 || c==210) {
755 		*p++ = 'N';
756 	    }
757 	    else if (c==211 || c==212 || c==213 || c==214) {
758 		*p++ = 'O';
759 	    }
760 	    else if (c==192 || c==216) {
761 		*p++ = 'R';
762 	    }
763 #ifndef WIN32
764 	    else if (c==166 || c==169 || c==170) {
765 		*p++ = 'S';
766 	    }
767 #else
768 	    else if (c==138 || c==140 || c==170) {
769 		*p++ = 'S';
770 	    }
771 #endif
772 #ifndef WIN32
773 	    else if (c==171 || c==222) {
774 		*p++ = 'T';
775 	    }
776 #else
777 	    else if (c==141 || c==222) {
778 		*p++ = 'T';
779 	    }
780 #endif
781 	    else if (c==217 || c==218 || c==219 || c==220) {
782 		*p++ = 'U';
783 	    }
784 	    else if (c==221) {
785 		*p++ = 'Y';
786 	    }
787 #ifndef WIN32
788 	    else if (c==172 || c==174 || c==175) {
789 		*p++ = 'Z';
790 	    }
791 #else
792 	    else if (c==142 || c==143 || c==175) {
793 		*p++ = 'Z';
794 	    }
795 #endif
796 #ifndef WIN32
797 	    else if (c==177 || c==225 || c==226 || c==227 || c==228) {
798 		*p++ = 'a';
799 	    }
800 #else
801 	    else if (c==185 || c==225 || c==226 || c==227 || c==228) {
802 		*p++ = 'a';
803 	    }
804 #endif
805 	    else if (c==230 || c==231 || c==232) {
806 		*p++ = 'c';
807 	    }
808 	    else if (c==239 || c==240) {
809 		*p++ = 'd';
810 	    }
811 	    else if (c==233 || c==234 || c==235 || c==236) {
812 		*p++ = 'e';
813 	    }
814 	    else if (c==237 || c==238) {
815 		*p++ = 'i';
816 	    }
817 #ifndef WIN32
818 	    else if (c==179 || c==181 || c==229) {
819 		*p++ = 'l';
820 	    }
821 #else
822 	    else if (c==179 || c==190 || c==229) {
823 		*p++ = 'l';
824 	    }
825 #endif
826 	    else if (c==241 || c==242) {
827 		*p++ = 'n';
828 	    }
829 	    else if (c==243 || c==244 || c==245 || c==246) {
830 		*p++ = 'o';
831 	    }
832 	    else if (c==224 || c==248) {
833 		*p++ = 'r';
834 	    }
835 #ifndef WIN32
836 	    else if (c==182 || c==185 || c==186 || c==223) {
837 		*p++ = 's';
838 	    }
839 #else
840 	    else if (c==154 || c==156 || c==186 || c==223) {
841 		*p++ = 's';
842 	    }
843 #endif
844 #ifndef WIN32
845 	    else if (c==187 || c==254) {
846 		*p++ = 't';
847 	    }
848 #else
849 	    else if (c==157 || c==254) {
850 		*p++ = 't';
851 	    }
852 #endif
853 	    else if (c==249 || c==250 || c==251 || c==252) {
854 		*p++ = 'u';
855 	    }
856 	    else if (c==253) {
857 		*p++ = 'y';
858 	    }
859 #ifndef WIN32
860 	    else if (c==188 || c==190 || c==191) {
861 		*p++ = 'z';
862 	    }
863 #else
864 	    else if (c==158 || c==159 || c==191) {
865 		*p++ = 'z';
866 	    }
867 #endif
868 	    q++;
869 	}
870     }
871 
872     *p = '\0';
873 }
874 
875 /* If @maxlen > 0 we limit the write to @targ to at most
876    @maxlen bytes (excluding the terminating nul byte).
877    If @sub > 0 we write this byte to @targ in place of
878    UTF-8 characters that we can't represent in ASCII,
879    provided they are lower than 0x0180.
880 */
881 
u8_to_ascii_convert(char * targ,const char * src,int maxlen,char sub)882 char *u8_to_ascii_convert (char *targ, const char *src,
883 			   int maxlen, char sub)
884 {
885     int prevspace = 0;
886     const char *q = src;
887     char *p = targ;
888     gunichar u;
889     int c, skip;
890     int len = 0;
891 
892     *p = '\0';
893 
894     /* If sub == 0 we assume we're doing varnames and
895        so we skip all characters that are not valid in
896        a gretl varname. But if sub > 0 we pass through
897        all printable ASCII characters.
898     */
899 
900     while (q && *q) {
901 	skip = 0;
902 	c = *q;
903 	if (sub > 0 && ((c >= 32 && c <= 126) || c == 9 || c == 10)) {
904 	    /* ASCII printables */
905 	    *p++ = c;
906 	    q++;
907 	} else if (c >= 0x0030 && c <= 0x0039) {
908 	    /* digits 0-9 */
909 	    *p++ = c;
910 	    q++;
911 	} else if (c >= 0x0041 && c <= 0x005A) {
912 	    /* upper-case ASCII letters */
913 	    *p++ = c;
914 	    q++;
915 	} else if (c >= 0x0061 && c <= 0x007A) {
916 	    /* lower-case ASCII letters */
917 	    *p++ = c;
918 	    q++;
919 	} else if (c == 0x005F) {
920 	    /* underscore */
921 	    *p++ = c;
922 	    q++;
923 	} else if (c == 0x0020) {
924 	    if (!prevspace) {
925 		prevspace = 1;
926 		*p++ = '_';
927 	    } else {
928 		skip = 1;
929 	    }
930 	    q++;
931 	} else {
932 	    /* handle Latin-1 and Latin-2, only */
933 	    u = g_utf8_get_char(q);
934 	    if (u >= 0x0180) {
935 		skip = 1; /* can't handle */
936 	    } else if ((u >= 0x00C0 && u <= 0x00C6) || u == 0x0102 || u == 0x0104) {
937 		*p++ = 'A';
938 	    } else if (u == 0x00C7 || u == 0x0106 || u == 0x010C) {
939 		*p++ = 'C';
940 	    } else if ((u >= 0x00C8 && u <= 0x00CB) || u == 0x0118 || u == 0x011A) {
941 		*p++ = 'E';
942 	    } else if (u >= 0x00CC && u <= 0x00CF) {
943 		*p++ = 'I';
944 	    } else if (u == 0x00D0 || u == 0x010E || u == 0x0110 || u == 0x010E) {
945 		*p++ = 'D';
946 	    } else if (u == 0x00D1 || u == 0x0143 || u == 0x0147) {
947 		*p++ = 'N';
948 	    } else if (u == 0x00D8 || (u >= 0x00D2 && u <= 0x00D6) || u == 0x0150) {
949 		*p++ = 'O';
950 	    } else if ((u >= 0x00D9 && u <= 0x00DC) || u == 0x016E || u == 0x0170) {
951 		*p++ = 'U';
952 	    } else if (u == 0x00DD) {
953 		*p++ = 'Y';
954 	    } else if (u == 0x00DE || u == 0x0164) {
955 		*p++ = 'T';
956 	    } else if (u == 0x00DF) {
957 		*p++ = 's';
958 	    } else if ((u >= 0x00E0 && u <= 0x00E6) || u == 0x0103) {
959 		*p++ = 'a';
960 	    } else if (u == 0x00E7 || u == 0x0107) {
961 		*p++ = 'c';
962 	    } else if ((u >= 0x00E8 && u <= 0x00EB) || u == 0x0119 || u == 0x011B) {
963 		*p++ = 'e';
964 	    } else if (u >= 0x00EC && u <= 0x00EF) {
965 		*p++ = 'i';
966 	    } else if (u == 0x00F0 || u == 0x0111 || u == 0x010F) {
967 		*p++ = 'd';
968 	    } else if (u == 0x00F1 || u == 0x0144 || u == 0x0148) {
969 		*p++ = 'n';
970 	    } else if (u == 0x00F8 || u == 0x0151 || (u >= 0x00F2 && u <= 0x00F6)) {
971 		*p++ = 'o';
972 	    } else if ((u >= 0x00F9 && u <= 0x00FC) || u == 0x016F || u == 0x0171) {
973 		*p++ = 'u';
974 	    } else if (u == 0x00FD || u == 0x00FF) {
975 		*p++ = 'y';
976 	    } else if (u == 0x00FE || u == 0x0163) {
977 		*p++ = 't';
978 	    } else if (u == 0x0141 || u == 0x013D || u == 0x0139) {
979 		*p++ = 'L';
980 	    } else if (u == 0x0142 || u == 0x013E || u == 0x013A) {
981 		*p++ = 'l';
982 	    } else if (u == 0x0154 || u == 0x0158) {
983 		*p++ = 'R';
984 	    } else if (u == 0x0155 || u == 0x0159) {
985 		*p++ = 'r';
986 	    } else if (u == 0x0160 || u == 0x015E) {
987 		*p++ = 'S';
988 	    } else if (u == 0x0161 || u == 0x015F) {
989 		*p = 's';
990 	    } else if (u == 0x0179 || u == 0x017D || u == 0x0178) {
991 		*p = 'Z';
992 	    } else if (u == 0x017A || u == 0x017E || u == 0x017C) {
993 		*p = 'z';
994 	    } else if (sub > 0) {
995 		*p = sub;
996 	    } else {
997 		skip = 1;
998 	    }
999 	    q = g_utf8_next_char(q);
1000 	}
1001 	if (c != 0x0020) {
1002 	    prevspace = 0;
1003 	}
1004 	if (!skip) len++;
1005 	if (maxlen > 0 && len == maxlen) {
1006 	    break;
1007 	}
1008     }
1009 
1010     *p = '\0';
1011 
1012     return targ;
1013 }
1014 
real_iso_to_ascii(char * s,int latin)1015 static char *real_iso_to_ascii (char *s, int latin)
1016 {
1017     char *tmp;
1018 
1019     tmp = malloc(strlen(s) + 1);
1020     if (tmp == NULL) {
1021 	return NULL;
1022     }
1023 
1024     if (latin != 1 && latin != 2) {
1025 	/* fallback?? */
1026 	latin = 1;
1027     }
1028 
1029     iso_to_ascii_translate(tmp, s, latin);
1030 
1031     strcpy(s, tmp);
1032     free(tmp);
1033 
1034     return s;
1035 }
1036 
iso_to_ascii(char * s)1037 char *iso_to_ascii (char *s)
1038 {
1039     return real_iso_to_ascii(s, 1);
1040 }
1041 
sprint_l2_to_ascii(char * targ,const char * s,size_t len)1042 char *sprint_l2_to_ascii (char *targ, const char *s, size_t len)
1043 {
1044     iso_to_ascii_translate(targ, s, 2);
1045 
1046     return targ;
1047 }
1048 
asciify_utf8_varname(char * s)1049 char *asciify_utf8_varname (char *s)
1050 {
1051     char *tmp = malloc(32);
1052 
1053     if (tmp != NULL) {
1054 	u8_to_ascii_convert(tmp, s, 31, 0);
1055 	strcpy(s, tmp);
1056 	free(tmp);
1057     }
1058 
1059     return s;
1060 }
1061 
1062 /* Convert from UTF-8 text in @s to a form suitable for
1063    inclusion in RTF, where non-ASCII characters are
1064    recoded to escaped Unicode numbering. Return the
1065    converted text in a newly allocated string.
1066 */
1067 
utf8_to_rtf(const char * s)1068 char *utf8_to_rtf (const char *s)
1069 {
1070     const char *nextp, *p = s;
1071     short int k;
1072     PRN *prn;
1073     char *ret = NULL;
1074     int err = 0;
1075 
1076     prn = gretl_print_new(GRETL_PRINT_BUFFER, &err);
1077     if (prn == NULL) {
1078 	return NULL;
1079     }
1080 
1081     while (*p) {
1082 	nextp = g_utf8_next_char(p);
1083 	if (nextp - p > 1) {
1084 	    k = (short) g_utf8_get_char(p);
1085 	    pprintf(prn, "\\u%d?", k);
1086 	} else {
1087 	    pputc(prn, *p);
1088 	}
1089 	p = nextp;
1090     }
1091 
1092     ret = gretl_print_steal_buffer(prn);
1093     gretl_print_destroy(prn);
1094 
1095     return ret;
1096 }
1097 
1098 #define ascii_ctrl(a) (a == '\t' || a == '\n' || \
1099                        a == '\r' || a == CTRLZ)
1100 
gretl_is_ascii(const char * buf)1101 int gretl_is_ascii (const char *buf)
1102 {
1103     int a;
1104 
1105     while (*buf) {
1106 	a = *buf;
1107 	if (a > 126 || (a < 32 && !(ascii_ctrl(a)))) {
1108 	    return 0;
1109 	}
1110 	buf++;
1111     }
1112 
1113     return 1;
1114 }
1115 
1116 /* We want to print @str in a field of @width (visible) characters,
1117    but @str may contain multi-byte characters. In that case, determine
1118    the adjustment to @width that is needed to avoid underrun and
1119    return the adjusted value.
1120 */
1121 
get_utf_width(const char * str,int width)1122 int get_utf_width (const char *str, int width)
1123 {
1124     /* the number of "invisible" bytes */
1125     int invis = strlen(str) - g_utf8_strlen(str, -1);
1126 
1127     return width + invis;
1128 }
1129 
get_translated_width(const char * str)1130 int get_translated_width (const char *str)
1131 {
1132     int w = strlen(str);
1133 
1134     w += w - g_utf8_strlen(str, -1);
1135 
1136     return w;
1137 }
1138 
1139 /* utility functionality: recoding of an entire file:
1140    we start with a couple of static "helpers"
1141 */
1142 
file_get_content(const char * fname,gsize * bytes,PRN * prn,int * err)1143 static gchar *file_get_content (const char *fname,
1144 				gsize *bytes,
1145 				PRN *prn,
1146 				int *err)
1147 {
1148     GError *gerr = NULL;
1149     gchar *buf = NULL;
1150     int ok;
1151 
1152     ok = g_file_get_contents(fname, &buf, bytes, &gerr);
1153 
1154     if (ok) {
1155 	pprintf(prn, "got content, %" G_GSIZE_FORMAT " bytes\n", *bytes);
1156     } else {
1157 	*err = E_FOPEN;
1158 	if (gerr != NULL) {
1159 	    gretl_errmsg_set(gerr->message);
1160 	    g_error_free(gerr);
1161 	}
1162     }
1163 
1164     return buf;
1165 }
1166 
file_set_content(const char * fname,const gchar * buf,gsize buflen)1167 static int file_set_content (const char *fname,
1168 			     const gchar *buf,
1169 			     gsize buflen)
1170 {
1171     GError *gerr = NULL;
1172     int ok, err = 0;
1173 
1174     ok = g_file_set_contents(fname, buf, buflen, &gerr);
1175 
1176     if (!ok) {
1177 	err = E_FOPEN;
1178 	if (gerr != NULL) {
1179 	    gretl_errmsg_set(gerr->message);
1180 	    g_error_free(gerr);
1181 	}
1182     }
1183 
1184     return err;
1185 }
1186 
glib_recode_buffer(const char * buf,const char * from_set,const char * to_set,gsize bytes,gsize * written,int * err)1187 static gchar *glib_recode_buffer (const char *buf,
1188 				  const char *from_set,
1189 				  const char *to_set,
1190 				  gsize bytes,
1191 				  gsize *written,
1192 				  int *err)
1193 {
1194     gchar *trbuf = NULL;
1195     GError *gerr = NULL;
1196     gsize got = 0;
1197 
1198     trbuf = g_convert(buf, bytes, to_set, from_set,
1199 		      &got, written, &gerr);
1200 
1201     if (gerr != NULL) {
1202 	*err = E_DATA;
1203 	gretl_errmsg_set(gerr->message);
1204 	g_error_free(gerr);
1205     }
1206 
1207     return trbuf;
1208 }
1209 
1210 /**
1211  * gretl_recode_file:
1212  * @path1: path to original file.
1213  * @path2: path to file to be written.
1214  * @from_set: the codeset of the original file.
1215  * @to_set: the codeset for the recoded file.
1216  * @prn: gretl printer (for a few comments) or NULL.
1217  *
1218  * Returns: 0 on success or non-zero code on error.
1219  */
1220 
gretl_recode_file(const char * path1,const char * path2,const char * from_set,const char * to_set,PRN * prn)1221 int gretl_recode_file (const char *path1, const char *path2,
1222 		       const char *from_set, const char *to_set,
1223 		       PRN *prn)
1224 {
1225     gchar *buf = NULL;
1226     gsize bytes = 0;
1227     int err = 0;
1228 
1229     /* get entire content of original file */
1230     buf = file_get_content(path1, &bytes, prn, &err);
1231 
1232     if (!err) {
1233 	gsize written = 0;
1234 	gchar *trbuf = glib_recode_buffer(buf, from_set, to_set,
1235 					  bytes, &written, &err);
1236 
1237 	if (!err) {
1238 	    /* write recoded text to file */
1239 	    pprintf(prn, "recoded: %" G_GSIZE_FORMAT " bytes\n", written);
1240 	    err = file_set_content(path2, trbuf, written);
1241 	}
1242 	g_free(trbuf);
1243     }
1244 
1245     g_free(buf);
1246 
1247     return err;
1248 }
1249