1 /*************************************************************************/
2 /* Copyright (c) 2004                                                    */
3 /* Daniel Sleator, David Temperley, and John Lafferty                    */
4 /* Copyright 2008, 2009, 2013 Linas Vepstas                              */
5 /* All rights reserved                                                   */
6 /*                                                                       */
7 /* Use of the link grammar parsing system is subject to the terms of the */
8 /* license set forth in the LICENSE file included with this software.    */
9 /* This license allows free redistribution and use in source and binary  */
10 /* forms, with or without modification, subject to certain conditions.   */
11 /*                                                                       */
12 /*************************************************************************/
13 
14 #include <ctype.h>
15 #include <errno.h>
16 #include <limits.h>
17 #ifdef _WIN32
18 #define _CRT_RAND_S
19 #endif /* _WIN32 */
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/types.h>
23 #include <stdarg.h>
24 #include <locale.h>
25 #ifdef HAVE_XLOCALE_H
26 #include <xlocale.h>
27 #endif /* HAVE_XLOCALE_H */
28 
29 #ifndef _WIN32
30 	// #include <unistd.h>
31 	#include <langinfo.h>
32 #else
33 	#include <windows.h>
34 #endif /* _WIN32 */
35 
36 #include "utilities.h"
37 
38 /* This file contains general utilities that fix, enhance OS-provided
39  * API's, esp ones that the OS forgot to provide, or managed to break.
40  */
41 
42 /* ============================================================= */
43 /* String utilities */
44 
45 /* Windows, POSIX and GNU have different ideas about strerror_r().  hence
46  * use our own function that uses the available system function and is
47  * consistent. It doesn't try to mimic exactly any version of
48  * strerror_r(). */
49 #ifdef _WIN32
lg_strerror(int err_no,char * buf,size_t len)50 void lg_strerror(int err_no, char *buf, size_t len)
51 {
52 	if (strerror_s(buf, len, err_no) != 0)
53 		strerror_s(buf, len, errno); /* errno got set by previous strerror_s() */
54 }
55 #else
56 #if HAVE_STRERROR_R
57 
58 #if STRERROR_R_CHAR_P
59 /* Using the GNU version. */
lg_strerror(int err_no,char * buf,size_t len)60 void lg_strerror(int err_no, char *buf, size_t len)
61 {
62 	char *errstr = strerror_r(err_no, buf, len);
63 	strncpy(buf, errstr, len);
64 	buf[len-1] = '\0';
65 }
66 #else /* !STRERROR_R_CHAR_P */
67 /* Using the XSI version. */
lg_strerror(int err_no,char * buf,size_t len)68 void lg_strerror(int err_no, char *buf, size_t len)
69 {
70 	errno = 0;
71 	if ((strerror_r(err_no, buf, len) == EINVAL) || (errno == EINVAL))
72 		snprintf(buf, len, "Unknown error %d", err_no);
73 }
74 #endif /* STRERROR_R_CHAR_P */
75 
76 #else /* !STRERROR_R */
77 /* No strerror_r()??? No thread-safe error message - use a workaround.
78  * (FIXME Could check if threads are not supported and use strerror(),
79  * else protect strerror().) */
lg_strerror(int err_no,char * buf,size_t len)80 void lg_strerror(int err_no, char *buf, size_t len)
81 {
82 	snprintf(buf, len, "Error %d", err_no);
83 }
84 #endif /* STRERROR_R */
85 #endif /* _WIN32 */
86 
safe_strdup(const char * u)87 char *safe_strdup(const char *u)
88 {
89 	if (u)
90 		return strdup(u);
91 	return NULL;
92 }
93 
94 /**
95  * A version of strlcpy, for those systems that don't have it.
96  */
97 /*	$OpenBSD: strlcpy.c,v 1.12 2015/01/15 03:54:12 millert Exp $	*/
98 /*
99  * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
100  *
101  * Permission to use, copy, modify, and distribute this software for any
102  * purpose with or without fee is hereby granted, provided that the above
103  * copyright notice and this permission notice appear in all copies.
104  *
105  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
106  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
107  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
108  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
109  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
110  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
111  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
112  */
113 /*
114  * Copy string src to buffer dst of size dsize.  At most dsize-1
115  * chars will be copied.  Always NUL terminates (unless dsize == 0).
116  * Returns strlen(src); if retval >= dsize, truncation occurred.
117  */
118 size_t
lg_strlcpy(char * restrict dst,const char * restrict src,size_t dsize)119 lg_strlcpy(char * restrict dst, const char * restrict src, size_t dsize)
120 {
121 	const char *osrc = src;
122 	size_t nleft = dsize;
123 
124 	/* Copy as many bytes as will fit. */
125 	if (nleft != 0) {
126 		while (--nleft != 0) {
127 			if ((*dst++ = *src++) == '\0')
128 				break;
129 		}
130 	}
131 
132 	/* Not enough room in dst, add NUL and traverse rest of src. */
133 	if (nleft == 0) {
134 		if (dsize != 0)
135 			*dst = '\0';      /* NUL-terminate dst */
136 		while (*src++)
137 			;
138 	}
139 
140 	return(src - osrc - 1); /* count does not include NUL */
141 }
142 
143 /**
144  * Catenates as much of v onto u as it can assuming u is of size usize
145  * guaranteed to terminate u with a '\0'.  Assumes u and v are null
146  * terminated.
147  */
safe_strcat(char * u,const char * v,size_t usize)148 void safe_strcat(char *u, const char *v, size_t usize)
149 {
150 	strncat(u, v, usize-strlen(u)-1);
151 	u[usize-1] = '\0';
152 }
153 
154 #ifndef HAVE_STRNDUP
155 /* Emulates glibc's strndup() */
156 char *
strndup(const char * str,size_t size)157 strndup (const char *str, size_t size)
158 {
159 	size_t len;
160 	char *result;
161 
162 	len = strlen (str);
163 	if (!len) return strdup ("");
164 	if (size > len) size = len;
165 
166 	result = (char *) malloc ((size + 1) * sizeof (char));
167 	memcpy (result, str, size);
168 	result[size] = 0x0;
169 	return result;
170 }
171 #endif /* !HAVE_STRNDUP */
172 
173 #ifndef HAVE_STRTOK_R
174 /*
175  * public domain strtok_r() by Charlie Gordon
176  * from comp.lang.c  9/14/2007
177  *     http://groups.google.com/group/comp.lang.c/msg/2ab1ecbb86646684
178  *
179  *     Declaration that it's public domain:
180  *     http://groups.google.com/group/comp.lang.c/msg/7c7b39328fefab9c
181  */
strtok_r(char * str,const char * delim,char ** nextp)182 char* strtok_r(char *str, const char *delim, char **nextp)
183 {
184 	char *ret;
185 
186 	if (str == NULL) str = *nextp;
187 	str += strspn(str, delim);
188 	if (*str == '\0') return NULL;
189 	ret = str;
190 	str += strcspn(str, delim);
191 	if (*str) *str++ = '\0';
192 	*nextp = str;
193 
194 	return ret;
195 }
196 #endif /* !HAVE_STRTOK_R */
197 
198 /* ============================================================= */
199 /* UTF8 utilities */
200 
201 #ifdef _WIN32
202 /**
203  * (Experimental) Implementation of mbrtowc for Windows.
204  * This is required because the other, commonly available implementations
205  * seem to not work very well, based on user reports.  Someone who is
206  * really, really good at windows programming needs to review this stuff!
207  */
lg_mbrtowc(wchar_t * pwc,const char * s,size_t n,mbstate_t * ps)208 size_t lg_mbrtowc(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
209 {
210 	int nb, nb2;
211 
212 	if (NULL == s) return 0;
213 	if (0 == n) return -2;
214 	if (0 == *s) { *pwc = 0; return 0; }
215 
216 	nb = utf8_charlen(s);
217 	if (0 == nb) return 0;
218 	if (0 > nb) return nb;
219 	nb2 = MultiByteToWideChar(CP_UTF8, 0, s, nb, NULL, 0);
220 	nb2 = MultiByteToWideChar(CP_UTF8, 0, s, nb, pwc, nb2);
221 	if (0 == nb2) return (size_t)-1;
222 	return nb;
223 }
224 
225 /**
226  * Emulate rand_r() using rand_s() in a way that is enough for our needs.
227  * Windows doesn't have rand_r(), and its rand_s() is different: It
228  * returns an error indication and not the random number like rand_r().
229  * The value it returns is through its argument.
230  *
231  * Note that "#define _CRT_RAND_S" is needed before "#include <stdlib.h>".
232  */
rand_r(unsigned int * s)233 int rand_r(unsigned int *s)
234 {
235 	rand_s(s);
236 	if (*s > INT_MAX) *s -= INT_MAX;
237 
238 	return *s;
239 }
240 #endif /* _WIN32 */
241 
wctomb_check(char * s,wchar_t wc)242 static int wctomb_check(char *s, wchar_t wc)
243 {
244 	int nr;
245 #ifdef _WIN32
246 	nr = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, NULL, 0, NULL, NULL);
247 	nr = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, s, nr, NULL, NULL);
248 	if (0 == nr) return -1;
249 #else
250 	mbstate_t mbss;
251 	memset(&mbss, 0, sizeof(mbss));
252 	nr = wcrtomb(s, wc, &mbss);
253 	if (nr < 0) {
254 		prt_error("Fatal Error: unknown character set %s\n", nl_langinfo(CODESET));
255 		exit(1);
256 	}
257 #endif /* _WIN32 */
258 	return nr;
259 }
260 
261 /**
262  * Downcase the first letter of the word.
263  * XXX FIXME This works 'most of the time', but is not technically correct.
264  * This is because towlower() and towupper() are locale dependent, and also
265  * because the byte-counts might not match up, e.g. German ß and SS.
266  * The correct long-term fix is to use ICU or glib g_utf8_strup(), etc.
267  */
downcase_utf8_str(char * to,const char * from,size_t usize,locale_t locale)268 void downcase_utf8_str(char *to, const char * from, size_t usize, locale_t locale)
269 {
270 	wchar_t c;
271 	int i, nbl, nbh;
272 	char low[MB_LEN_MAX];
273 	mbstate_t mbs;
274 
275 	/* Make sure it doesn't contain garbage in case of an error */
276 	if (to != from) strcpy(to, from);
277 
278 	memset(&mbs, 0, sizeof(mbs));
279 	nbh = mbrtowc (&c, from, MB_CUR_MAX, &mbs);
280 	if (nbh < 0)
281 	{
282 		prt_error("Error: Invalid UTF-8 string!\n");
283 		return;
284 	}
285 	c = towlower_l(c, locale);
286 	nbl = wctomb_check(low, c);
287 
288 	/* Check for error on an in-place copy */
289 	if ((nbh < nbl) && (to == from))
290 	{
291 		/* I'm to lazy to fix this */
292 		prt_error("Error: can't downcase UTF-8 string!\n");
293 		return;
294 	}
295 
296 	/* Downcase */
297 	for (i=0; i<nbl; i++) { to[i] = low[i]; }
298 
299 	if ((nbh == nbl) && (to == from)) return;
300 
301 	from += nbh;
302 	to += nbl;
303 	lg_strlcpy(to, from, usize-nbl);
304 }
305 
306 #if 0
307 /**
308  * Upcase the first letter of the word.
309  * XXX FIXME This works 'most of the time', but is not technically correct.
310  * This is because towlower() and towupper() are locale dependent, and also
311  * because the byte-counts might not match up, e.g. German ß and SS.
312  * The correct long-term fix is to use ICU or glib g_utf8_strup(), etc.
313  */
314 void upcase_utf8_str(char *to, const char * from, size_t usize, locale_t locale)
315 {
316 	wchar_t c;
317 	int i, nbl, nbh;
318 	char low[MB_LEN_MAX];
319 	mbstate_t mbs;
320 
321 	memset(&mbs, 0, sizeof(mbs));
322 	nbh = mbrtowc (&c, from, MB_CUR_MAX, &mbs);
323 	if (nbh < 0)
324 	{
325 		prt_error("Error: Invalid UTF-8 string!\n");
326 		return;
327 	}
328 	c = towupper_l(c, locale);
329 	nbl = wctomb_check(low, c);
330 
331 	/* Check for error on an in-place copy */
332 	if ((nbh < nbl) && (to == from))
333 	{
334 		/* I'm to lazy to fix this */
335 		prt_error("Error: can't upcase UTF-8 string!\n");
336 		return;
337 	}
338 
339 	/* Upcase */
340 	for (i=0; i<nbl; i++) { to[i] = low[i]; }
341 
342 	if ((nbh == nbl) && (to == from)) return;
343 
344 	from += nbh;
345 	to += nbl;
346 	lg_strlcpy(to, from, usize-nbl);
347 }
348 #endif
349 
350 #ifdef NO_ALIGNED_MALLOC
351 #if __GNUC__
352 #warning No aligned alloc found (using malloc() instead).
353 #endif
354 #endif /* NO_ALIGNED_MALLOC */
355 
356 #ifdef HAVE_POSIX_MEMALIGN
aligned_alloc(size_t alignment,size_t size)357 void *aligned_alloc(size_t alignment, size_t size)
358 {
359 	void *ptr;
360 	errno = posix_memalign(&ptr, alignment, size);
361 	return ptr;
362 }
363 #endif /* HAVE_POSIX_MEMALIGN */
364 
365 /* ============================================================= */
366 /* Memory alloc routines below. These routines attempt to keep
367  * track of how much space is getting used during a parse.
368  *
369  * This code is probably obsolescent, and should probably be dumped.
370  * No one (that I know of) looks at the space usage; its one of the
371  * few areas that needs pthreads -- it would be great to just get
372  * rid of it (and thus get rid of pthreads).
373  */
374 
375 #ifdef TRACK_SPACE_USAGE
376 typedef struct
377 {
378 	size_t max_space_used;
379 	size_t space_in_use;
380 	size_t num_xallocs;
381 	size_t num_xfrees;
382 	size_t max_outstanding_xallocs;
383 	size_t max_external_space_used;
384 	size_t external_space_in_use;
385 	size_t num_exallocs;
386 	size_t num_exfrees;
387 	size_t max_outstanding_exallocs;
388 } space_t;
389 
390 static TLS space_t space;
do_init_memusage(void)391 static space_t * do_init_memusage(void)
392 {
393 	space_t *s = &space;
394 
395 	s->max_space_used = 0;
396 	s->space_in_use = 0;
397 	s->num_xallocs = 0;
398 	s->num_xfrees = 0;
399 	s->max_outstanding_xallocs = 0;
400 	s->max_external_space_used = 0;
401 	s->external_space_in_use = 0;
402 	s->num_exallocs = 0;
403 	s->num_exfrees = 0;
404 	s->max_outstanding_exallocs = 0;
405 
406 	return s;
407 }
408 
init_memusage(void)409 void init_memusage(void)
410 {
411 	static bool mem_inited = false;
412 	if (mem_inited) return;
413 	mem_inited = true;
414 	do_init_memusage();
415 }
416 
getspace(void)417 static inline space_t *getspace(void)
418 {
419 	return &space;
420 }
421 
422 /**
423  * space used but not yet freed during parse
424  */
get_space_in_use(void)425 size_t get_space_in_use(void)
426 {
427 	return getspace()->space_in_use;
428 }
429 
430 /**
431  * maximum space used during the parse
432  */
get_max_space_used(void)433 size_t get_max_space_used(void)
434 {
435 	return getspace()->max_space_used;
436 }
437 #else /* TRACK_SPACE_USAGE */
init_memusage(void)438 void init_memusage(void) {}
get_space_in_use(void)439 size_t get_space_in_use(void) { return 0; }
get_max_space_used(void)440 size_t get_max_space_used(void) { return 0; }
441 #endif /* TRACK_SPACE_USAGE */
442 
443 /**
444  * alloc some memory, and keep track of the space allocated.
445  */
xalloc(size_t size)446 void * xalloc(size_t size)
447 {
448 	void * p = malloc(size);
449 
450 #ifdef TRACK_SPACE_USAGE
451 	space_t *s = getspace();
452 	s->space_in_use += size;
453 	if (s->max_space_used < s->space_in_use) s->max_space_used = s->space_in_use;
454 	s->num_xallocs ++;
455 	if (s->max_outstanding_xallocs < (s->num_xallocs - s->num_xfrees))
456 		s->max_outstanding_xallocs = (s->num_xallocs - s->num_xfrees);
457 
458 #endif /* TRACK_SPACE_USAGE */
459 	if ((p == NULL) && (size != 0))
460 	{
461 		prt_error("Fatal Error: Ran out of space. (int)\n");
462 		abort();
463 		exit(1);
464 	}
465 	return p;
466 }
467 
468 #ifdef TRACK_SPACE_USAGE
xfree(void * p,size_t size)469 void xfree(void * p, size_t size)
470 {
471 	space_t *s = getspace();
472 	s->space_in_use -= size;
473 	s->num_xfrees ++;
474 
475 	free(p);
476 }
477 #endif /* TRACK_SPACE_USAGE */
478 
exalloc(size_t size)479 void * exalloc(size_t size)
480 {
481 	void * p = malloc(size);
482 #ifdef TRACK_SPACE_USAGE
483 	space_t *s = getspace();
484 	s->external_space_in_use += size;
485 	if (s->max_external_space_used < s->external_space_in_use)
486 		s->max_external_space_used = s->external_space_in_use;
487 	s->num_exallocs ++;
488 	if (s->max_outstanding_exallocs < (s->num_exallocs - s->num_exfrees))
489 		s->max_outstanding_exallocs = (s->num_exallocs - s->num_exfrees);
490 #endif /* TRACK_SPACE_USAGE */
491 
492 	if ((p == NULL) && (size != 0))
493 	{
494 		prt_error("Fatal Error: Ran out of space. (ext)\n");
495 		abort();
496 		exit(1);
497 	}
498 	return p;
499 }
500 
501 #ifdef TRACK_SPACE_USAGE
exfree(void * p,size_t size)502 void exfree(void * p, size_t size)
503 {
504 	space_t *s = getspace();
505 	s->external_space_in_use -= size;
506 	s->num_exfrees ++;
507 	free(p);
508 }
509 #endif /* TRACK_SPACE_USAGE */
510 
511 /* =========================================================== */
512 /* Simple, cheap, easy dynamic string. */
513 
dyn_str_new(void)514 dyn_str* dyn_str_new(void)
515 {
516 	dyn_str *ds = malloc(sizeof(dyn_str));
517 	ds->len = 250;
518 	ds->end = 0;
519 	ds->str = malloc(ds->len);
520 	ds->str[0] = 0x0;
521 	return ds;
522 }
523 
dyn_str_delete(dyn_str * ds)524 void dyn_str_delete(dyn_str* ds)
525 {
526 	free(ds->str);
527 	free(ds);
528 }
529 
dyn_str_take(dyn_str * ds)530 char * dyn_str_take(dyn_str* ds)
531 {
532 	char * rv = ds->str;
533 	free(ds);
534 	return rv;
535 }
536 
dyn_strcat(dyn_str * ds,const char * str)537 void dyn_strcat(dyn_str* ds, const char *str)
538 {
539 	size_t l = strlen(str);
540 	if (ds->end+l+1 >= ds->len)
541 	{
542 		ds->len = 2 * ds->len + l;
543 		ds->str = realloc(ds->str, ds->len);
544 	}
545 	strcpy (ds->str+ds->end, str);
546 	ds->end += l;
547 }
548 
549 /// Trim away trailing whitespace.
dyn_trimback(dyn_str * ds)550 void dyn_trimback(dyn_str* ds)
551 {
552 	size_t tail = ds->end;
553 	while (0 < tail && ' ' == ds->str[--tail]) {}
554 
555 	ds->end = ++tail;
556 	ds->str[tail] = 0x0;
557 }
558 
dyn_str_value(dyn_str * s)559 const char * dyn_str_value(dyn_str* s)
560 {
561 	return s->str;
562 }
563 
dyn_strlen(dyn_str * s)564 size_t dyn_strlen(dyn_str* s)
565 {
566 	return s->end;
567 }
568 
569 /* ======================================================== */
570 /* Locale routines */
571 
572 #ifdef HAVE_LOCALE_T
573 /**
574  * Create a locale object from the given locale string.
575  * @param locale Locale string, in the native OS format.
576  * @return Locale object for the given locale
577  * Note: It has to be freed by freelocale().
578  */
newlocale_LC_CTYPE(const char * locale)579 locale_t newlocale_LC_CTYPE(const char *locale)
580 {
581 	locale_t locobj;
582 #ifdef _WIN32
583 	locobj = _create_locale(LC_CTYPE, locale);
584 #else
585 	locobj = newlocale(LC_CTYPE_MASK, locale, (locale_t)0);
586 #endif /* _WIN32 */
587 	return locobj;
588 }
589 #endif /* HAVE_LOCALE_T */
590 
591 /**
592  * Check that the given locale known by the system.
593  * In case we don't have locale_t, actually set the locale
594  * in order to find out if it is fine. This side effect doesn't cause
595  * harm, as the locale would be set up to that value anyway shortly.
596  * @param locale Locale string
597  * @return True if known, false if unknown.
598  */
try_locale(const char * locale)599 bool try_locale(const char *locale)
600 {
601 #ifdef HAVE_LOCALE_T
602 		locale_t ltmp = newlocale_LC_CTYPE(locale);
603 		if ((locale_t)0 == ltmp) return false;
604 		freelocale(ltmp);
605 #else
606 		lgdebug(D_USER_FILES, "Debug: Setting program's locale \"%s\"", locale);
607 		if (NULL == setlocale(LC_CTYPE, locale))
608 		{
609 			lgdebug(D_USER_FILES, " failed!\n");
610 			return false;
611 		}
612 		lgdebug(D_USER_FILES, ".\n");
613 #endif /* HAVE_LOCALE_T */
614 
615 		return true;
616 }
617 
618 /**
619  * Ensure that the program's locale has a UTF-8 codeset.
620  */
set_utf8_program_locale(void)621 void set_utf8_program_locale(void)
622 {
623 #ifndef _WIN32
624 	/* The LG library doesn't use mbrtowc_l(), since it doesn't exist in
625 	 * the dynamic glibc (2.22). mbsrtowcs_l() could also be used, but for
626 	 * some reason it exists only in the static glibc.
627 	 * In order that mbrtowc() will work for any UTF-8 character, UTF-8
628 	 * codeset is ensured. */
629 	const char *codeset = nl_langinfo(CODESET);
630 	if (!strstr(codeset, "UTF") && !strstr(codeset, "utf"))
631 	{
632 		const char *locale = setlocale(LC_CTYPE, NULL);
633 		/* Avoid an initial spurious message. */
634 		if ((0 != strcmp(locale, "C")) && (0 != strcmp(locale, "POSIX")))
635 		{
636 			prt_error("Warning: Program locale \"%s\" (codeset %s) was not UTF-8; "
637 						 "force-setting to en_US.UTF-8\n", locale, codeset);
638 		}
639 		locale = setlocale(LC_CTYPE, "en_US.UTF-8");
640 		if (NULL == locale)
641 		{
642 			prt_error("Warning: Program locale en_US.UTF-8 could not be set; "
643 			          "force-setting to C.UTF-8\n");
644 			locale = setlocale(LC_CTYPE, "C.UTF-8");
645 			if (NULL == locale)
646 			{
647 				prt_error("Warning: Could not set a UTF-8 program locale; "
648 				          "program may malfunction\n");
649 			}
650 		}
651 	}
652 #endif /* !_WIN32 */
653 }
654 
655 #ifdef _WIN32
656 static char *
win32_getlocale(void)657 win32_getlocale (void)
658 {
659 	char lbuf[10];
660 	char locale[32];
661 
662 	LCID lcid = GetThreadLocale();
663 
664 	if (0 >= GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, lbuf, sizeof(lbuf)))
665 	{
666 		prt_error("Error: GetLocaleInfoA LOCALE_SENGLISHLANGUAGENAME LCID=%d: "
667 		          "Error %d\n", (int)lcid, (int)GetLastError());
668 		return NULL;
669 	}
670 	strcpy(locale, lbuf);
671 	strcat(locale, "-");
672 
673 	if (0 >= GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, lbuf, sizeof(lbuf)))
674 	{
675 		prt_error("Error: GetLocaleInfoA LOCALE_SISO3166CTRYNAME LCID=%d: "
676 		          "Error %d\n", (int)lcid, (int)GetLastError());
677 		return NULL;
678 	}
679 	strcat(locale, lbuf);
680 
681 	return strdup(locale);
682 }
683 #endif /* _WIN32 */
684 
get_default_locale(void)685 char * get_default_locale(void)
686 {
687 	const char *lc_vars[] = {"LC_ALL", "LC_CTYPE", "LANG", NULL};
688 	char *ev;
689 	const char **evname;
690 	char *locale = NULL;
691 
692 	for(evname = lc_vars; NULL != *evname; evname++)
693 	{
694 		ev = getenv(*evname);
695 		if ((NULL != ev) && ('\0' != ev[0])) break;
696 	}
697 	if (NULL != *evname)
698 	{
699 		locale = ev;
700 		lgdebug(D_USER_FILES, "Debug: Environment locale \"%s=%s\"\n", *evname, ev);
701 #ifdef _WIN32
702 		/* If compiled with MSVC/MinGW, we still support running under Cygwin. */
703 		const char *ostype = getenv("OSTYPE");
704 		if ((NULL != ostype) && (0 == strcmp(ostype, "cygwin")))
705 		{
706 			/* Convert to Windows style locale */
707 			locale = strdupa(locale);
708 			locale[strcspn(locale, "_")] = '-';
709 			locale[strcspn(locale, ".@")] = '\0';
710 		}
711 #endif /* _WIN32 */
712 	}
713 	else
714 	{
715 		lgdebug(D_USER_FILES, "Debug: Environment locale not set\n");
716 #ifdef _WIN32
717 		locale = win32_getlocale();
718 		if (NULL == locale)
719 			lgdebug(D_USER_FILES, "Debug: Cannot find user default locale\n");
720 		else
721 			lgdebug(D_USER_FILES, "Debug: User default locale \"%s\"\n", locale);
722 		return locale; /* Already strdup'ed */
723 #endif /* _WIN32 */
724 	}
725 
726 	return safe_strdup(locale);
727 }
728 
729 #ifdef HAVE_LOCALE_T
730 static void free_C_LC_NUMERIC(void);
731 
get_C_LC_NUMERIC(void)732 static locale_t get_C_LC_NUMERIC(void)
733 {
734 	static locale_t locobj;
735 
736 	if ((locale_t)0 != locobj) return locobj;
737 
738 #ifdef _WIN32
739 	locobj = _create_locale(LC_NUMERIC, "C");
740 #else
741 	locobj = newlocale(LC_NUMERIC_MASK, "C", (locale_t)0);
742 #endif /* _WIN32 */
743 
744 	atexit(free_C_LC_NUMERIC);
745 
746 	return locobj;
747 }
748 
free_C_LC_NUMERIC(void)749 static void free_C_LC_NUMERIC(void)
750 {
751 	freelocale(get_C_LC_NUMERIC());
752 }
753 #endif /* HAVE_LOCALE_T */
754 
755 /* FIXME: Rewrite to directly convert scaled integer strings (only). */
strtodC(const char * s,float * r)756 bool strtodC(const char *s, float *r)
757 {
758 	char *err;
759 
760 #ifdef HAVE_LOCALE_T
761 	double val = strtod_l(s, &err, get_C_LC_NUMERIC());
762 #else
763 	/* dictionary_setup_locale() invokes setlocale(LC_NUMERIC, "C") */
764 	double val = strtod(s, &err);
765 #endif /* HAVE_LOCALE_T */
766 
767 	if ('\0' != *err) return false; /* *r unaffected */
768 
769 	*r = val;
770 	return true;
771 }
772 
773 /* ============================================================= */
774 /* Alternatives utilities */
775 
altlen(const char ** arr)776 size_t altlen(const char **arr)
777 {
778 	size_t len = 0;
779 	if (arr)
780 		while (arr[len] != NULL) len++;
781 	return len;
782 }
783 
784 /* ============================================================= */
785 
786 #ifdef __MINGW32__
787 /*
788  * Since _USE_MINGW_ANSI_STDIO=1 is used in order to support C99 STDIO
789  * including the %z formats, MinGW uses its own *printf() functions (and not
790  * the Windows ones). However, its printf()/fprintf() functions cannot write
791  * UTF-8 to the console (to files/pipes they write UTF-8 just fine).  It
792  * turned out the problem is that they use the putchar() of Windows, which
793  * doesn't support writing UTF-8 only when writing to the console!  This
794  * problem is not fixed even in Windows 10 and the latest MinGW in Cygwin
795  * 2.5.2.
796  *
797  * The workaround implemented here is to reimplement the corresponding MinGW
798  * internal functions, and use fputs() to write the result.
799  *
800  * (Reimplementing printf()/fprintf() this way didn't work even with the
801  * compilation flag -fno-builtin .)
802  */
803 
__mingw_vfprintf(FILE * __restrict__ stream,const char * __restrict__ fmt,va_list vl)804 int __mingw_vfprintf (FILE * __restrict__ stream, const char * __restrict__ fmt,
805                       va_list vl)
806 {
807 	int n = vsnprintf(NULL, 0, fmt, vl);
808 	if (0 > n) return n;
809 	char *buf = malloc(n+1);
810 	n = vsnprintf(buf, n+1, fmt, vl);
811 	if (0 > n)
812 	{
813 		free(buf);
814 		return n;
815 	}
816 
817 	n = fputs(buf, stdout);
818 	free(buf);
819 	return n;
820 }
821 
__mingw_vprintf(const char * __restrict__ fmt,va_list vl)822 int __mingw_vprintf (const char * __restrict__ fmt, va_list vl)
823 {
824 	return __mingw_vfprintf(stdout, fmt, vl);
825 }
826 #endif /* __MINGW32__ */
827 /* ============================================================= */
828