1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 /* strutils.c for gretl */
21 
22 #include "libgretl.h"
23 
24 #include <errno.h>
25 #include <time.h>
26 #include <glib.h>
27 
28 /**
29  * SECTION:strutils
30  * @short_description: miscellaneous string-handling utilities
31  * @title: Strings
32  * @include: libgretl.h
33  *
34  * Various functions for creating, testing and manipulating
35  * strings and arrays of strings.
36  */
37 
38 /**
39  * string_is_blank:
40  * @s: the string to examine.
41  *
42  * Returns: 1 if the string is NULL, of length zero, or contains
43  * nothing but space characters, otherwise returns 0.
44  **/
45 
string_is_blank(const char * s)46 int string_is_blank (const char *s)
47 {
48     int ret = 1;
49 
50     if (s != NULL) {
51 	while (*s) {
52 	    if (!isspace((unsigned char) *s) &&
53 		*s != '\r' && *s != CTRLZ) {
54 		ret = 0;
55 		break;
56 	    }
57 	    s++;
58 	}
59     }
60 
61     return ret;
62 }
63 
64 static int atof_point;
65 
set_atof_point(char c)66 void set_atof_point (char c)
67 {
68     atof_point = c;
69 }
70 
71 /**
72  * dot_atof:
73  * @s: the string to convert.
74  *
75  * Returns: the double-precision numeric interpretation of @s,
76  * where the decimal point character is forced to be '.',
77  * regardless of the current locale.
78  **/
79 
dot_atof(const char * s)80 double dot_atof (const char *s)
81 {
82 #ifndef ENABLE_NLS
83     return atof(s);
84 #else
85     double x;
86 
87     if (atof_point == 0) {
88 	struct lconv *lc = localeconv();
89 
90 	atof_point = *lc->decimal_point;
91     }
92 
93     if (atof_point == '.') {
94 	x = atof(s);
95     } else {
96 	gretl_push_c_numeric_locale();
97 	x = atof(s);
98 	gretl_pop_c_numeric_locale();
99     }
100 
101     return x;
102 #endif
103 }
104 
105 /**
106  * gretl_dotpos:
107  * @str: the string to examine.
108  *
109  * Returns: the integer position of the last "." within @str,
110  * or strlen(@str) in case a dot is not found, or the string
111  * ends with a (backward or forward) slash.
112  */
113 
gretl_dotpos(const char * str)114 int gretl_dotpos (const char *str)
115 {
116     int i, p = 0;
117 
118     if (str != NULL && *str != '\0') {
119 	p = strlen(str);
120 	for (i=p-1; i>0; i--) {
121 	    if (str[i] == '/' || str[i] == '\\') {
122 		break;
123 	    } else if (str[i] == '.') {
124 		p = i;
125 		break;
126 	    }
127 	}
128     }
129 
130     return p;
131 }
132 
133 /**
134  * gretl_slashpos:
135  * @str: the string to examine.
136  *
137  * Returns: the integer position of the last #SLASH within @str,
138  * or 0 in case a #SLASH is not found.
139  */
140 
gretl_slashpos(const char * str)141 int gretl_slashpos (const char *str)
142 {
143     int i, p = 0;
144 
145     if (str != NULL && *str != '\0') {
146 	p = strlen(str);
147 	for (i=p-1; i>0; i--) {
148 #ifdef WIN32
149 	    if (str[i] == '\\' || str[i] == '/') {
150 #else
151 	    if (str[i] == SLASH) {
152 #endif
153 		p = i;
154 		break;
155 	    }
156 	}
157     }
158 
159     return p;
160 }
161 
162 /**
163  * strrslash:
164  * @s: the string to examine.
165  *
166  * Returns: a pointer to the last occurrence of 'SLASH'
167  * in the string @d, making allowance for the fact that
168  * on MS Windows this may be either a backslash or a
169  * forward slash, or NULL is no 'SLASH' is found.
170  */
171 
172 char *strrslash (const char *s)
173 {
174 #ifdef WIN32
175     char *p1 = strrchr(s, '\\');
176     char *p2 = strrchr(s, '/');
177 
178     if (p1 != NULL && p2 == NULL) {
179 	return p1;
180     } else if (p2 != NULL && p1 == NULL) {
181 	return p2;
182     } else if (p1 != NULL && p2 != NULL) {
183 	return p2 - p1 > 0 ? p2 : p1;
184     } else {
185 	return NULL;
186     }
187 #else
188     return strrchr(s, '/');
189 #endif
190 }
191 
192 /**
193  * gretl_delchar:
194  * @c: the character to delete.
195  * @str: the string from which to delete @c.
196  *
197  * Deletes all instances of @c within @str.
198  *
199  * Returns: the possibly modified string.
200  */
201 
202 char *gretl_delchar (int c, char *str)
203 {
204     int i, j;
205 
206     for (i=j=0; str[i] != '\0'; i++) {
207 	if (str[i] != c) {
208 	    str[j++] = str[i];
209 	}
210     }
211 
212     str[j] = '\0';
213 
214     return str;
215 }
216 
217 /**
218  * gretl_delete:
219  * @str: the string to process.
220  * @idx: the starting point for deleting characters.
221  * @count: the number of characters to delete.
222  *
223  * Deletes @count characters from @str, starting at position @idx.
224  *
225  * Returns: the modified string.
226  */
227 
228 char *gretl_delete (char *str, int idx, int count)
229 {
230     size_t i, n = strlen(str);
231 
232     for (i=idx; i<=n-count; ++i) {
233 	str[i] = str[count+i];
234     }
235 
236     return str;
237 }
238 
239 /**
240  * gretl_unquote:
241  * @str: the string to process.
242  * @err: location to receive error code.
243  *
244  * If @str begins with the ASCII double-quote character, checks
245  * that the last character is also a double-quote, and in that
246  * case trims the quotes from both ends. If the first character
247  * is a double quote but the last is not, flags an error. If
248  * the string is not quoted at all, returns the original
249  * string.
250  *
251  * Returns: the input string, possibly modified in place.
252  */
253 
254 char *gretl_unquote (char *str, int *err)
255 {
256     *err = 0;
257 
258     if (*str == '"') {
259 	int n = strlen(str);
260 
261 	if (n > 1) {
262 	    if (str[n-1] == '"') {
263 		str[n-1] = '\0';
264 	    } else {
265 		*err = E_PARSE;
266 	    }
267 	} else {
268 	    *err = E_PARSE;
269 	}
270 
271 	if (!*err) {
272 	    shift_string_left(str, 1);
273 	}
274     }
275 
276     return str;
277 }
278 
279 /**
280  * gretl_charpos:
281  * @c: the character to look for.
282  * @s: the string to examine.
283  *
284  * Returns: the first position of @c in @s, or -1 if @c is not
285  * found.
286  */
287 
288 int gretl_charpos (char c, const char *s)
289 {
290     int i = 0;
291 
292     while (*s) {
293 	if (*s++ == c) {
294 	    return i;
295 	}
296 	i++;
297     }
298 
299     return -1;
300 }
301 
302 /**
303  * gretl_charsub:
304  * @str: the string to operate on.
305  * @find: the character to replace.
306  * @repl: the replacement character.
307  *
308  * Replaces all occurrences of @find with @repl in @str.
309  *
310  * Returns: the (possibly modified) string.
311  */
312 
313 char *gretl_charsub (char *str, char find, char repl)
314 {
315     char *p = str;
316 
317     while (*p) {
318 	if (*p == find) {
319 	    *p = repl;
320 	}
321 	p++;
322     }
323 
324     return str;
325 }
326 
327 /**
328  * comma_separate_numbers:
329  * @s: the string to operate on.
330  *
331  * Given a string which contains two or more numbers
332  * separated by spaces and/or commas, revise the
333  * string to ensure that all the numbers are comma-separated.
334  *
335  * Returns: the (possibly modified) string.
336  */
337 
338 char *comma_separate_numbers (char *s)
339 {
340     const char *numstart = "+-.0123456789";
341     char *p = s;
342     int i, n, done;
343 
344     while (*s) {
345 	n = strspn(s, " ,");
346 	if (n > 0 && s[n] != '\0' && strchr(numstart, s[n])) {
347 	    done = 0;
348 	    for (i=0; i<n && !done; i++) {
349 		if (s[i] == ',') {
350 		    done = 1;
351 		}
352 	    }
353 	    if (!done) {
354 		*s = ',';
355 	    }
356 	}
357 	s += (n > 0)? n : 1;
358     }
359 
360     return p;
361 }
362 
363 /**
364  * has_suffix:
365  * @str: the string to check.
366  * @sfx: the suffix to check for, including the leading '.'
367  *
368  * Returns: 1 if @str ends with @sfx (on a case-insensitive
369  * comparison), 0 otherwise.
370  */
371 
372 int has_suffix (const char *str, const char *sfx)
373 {
374     const char *p;
375     int comp, ret = 0;
376 
377     /* compound suffix, such as ".csv.gz" ? */
378     comp = (strchr(sfx + 1, '.') != NULL);
379 
380     if (str != NULL && sfx != NULL) {
381 	p = strrchr(str, *sfx);
382 	if (comp && p - str > 4) {
383 	    p -= 4;
384 	}
385 	if (p != NULL && strlen(p) == strlen(sfx)) {
386 	    ret = 1;
387 	    while (*p) {
388 		if (*p != *sfx && *p != toupper(*sfx)) {
389 		    ret = 0;
390 		    break;
391 		}
392 		p++;
393 		sfx++;
394 	    }
395 	}
396     }
397 
398     return ret;
399 }
400 
401 /**
402  * has_native_data_suffix:
403  * @fname: the filename to check.
404  *
405  * Returns: 1 if @fname ends with a suffix indicating it is a
406  * native gretl data file, 0 otherwise.
407  */
408 
409 int has_native_data_suffix (const char *fname)
410 {
411     const char *p;
412 
413     if (fname != NULL && (p = strrchr(fname, '.')) != NULL) {
414 	p++;
415 	if (!strcmp(p, "gdt") || !strcmp(p, "gdtb")) {
416 	    return 1;
417 	}
418 	if (!strcmp(p, "GDT") || !strcmp(p, "GDTB")) {
419 	    return 1;
420 	}
421     }
422 
423     return 0;
424 }
425 
426 /**
427  * numeric_string:
428  * @str: the string to examine.
429  *
430  * Returns: 1 if the given @str is numeric, otherwise 0.
431  */
432 
433 int numeric_string (const char *str)
434 {
435     char *test;
436     int ret = 1;
437 
438     if (str == NULL || *str == '\0') {
439 	return 0;
440     }
441 
442     if (strlen(str) == 3) {
443 	char screen[4];
444 
445 	strcpy(screen, str);
446 	gretl_lower(screen);
447 	if (!strcmp(screen, "inf") || !strcmp(screen, "nan")) {
448 	    /* could be variable names: they are not valid numbers */
449 	    return 0;
450 	}
451     }
452 
453     gretl_push_c_numeric_locale();
454     errno = 0;
455     strtod(str, &test);
456     gretl_pop_c_numeric_locale();
457 
458     if (*test != '\0' || errno == ERANGE) {
459 	ret = 0;
460     }
461 
462     return ret;
463 }
464 
465 /**
466  * integer_string:
467  * @str: the string to examine.
468  *
469  * Returns: 1 if the given @str represents an integer, otherwise 0.
470  */
471 
472 int integer_string (const char *str)
473 {
474     char *test;
475     int ret = 1;
476 
477     if (str == NULL || *str == '\0') {
478 	return 0;
479     }
480 
481     errno = 0;
482     strtol(str, &test, 10);
483     if (*test != '\0' || errno != 0) {
484 	ret = 0;
485     }
486 
487     return ret;
488 }
489 
490 /**
491  * ends_with_backslash:
492  * @s: the string to examine.
493  *
494  * Returns: 1 if the last non-space character in @s is a backslash,
495  * otherwise 0.
496  */
497 
498 int ends_with_backslash (const char *s)
499 {
500     int i, n = strlen(s);
501     int bs = 0;
502 
503     for (i=n-1; i>=0; i--) {
504 	if (!isspace((unsigned char) s[i])) {
505 	    if (s[i] == '\\') {
506 		bs = 1;
507 	    }
508 	    break;
509 	}
510     }
511 
512     return bs;
513 }
514 
515 /**
516  * gretl_lower:
517  * @str: the string to transform.
518  *
519  * Converts any upper case characters in @str to lower case.
520  *
521  * Returns: the possibly modified string.
522  */
523 
524 char *gretl_lower (char *str)
525 {
526     char *p = str;
527 
528     while (*p) {
529         if (isupper((unsigned char) *p)) {
530 	    *p = tolower(*p);
531 	}
532         p++;
533     }
534 
535     return str;
536 }
537 
538 /**
539  * gretl_strdup:
540  * @src: the string to duplicate.
541  *
542  * Returns: an allocated copy of @src, or NULL on error.
543  */
544 
545 char *gretl_strdup (const char *src)
546 {
547     char *targ = NULL;
548 
549     if (src != NULL) {
550 	size_t n = strlen(src) + 1;
551 
552 	targ = calloc(n, 1);
553 	if (targ != NULL) {
554 	    memcpy(targ, src, n);
555 	}
556     }
557 
558     return targ;
559 }
560 
561 /**
562  * gretl_strndup:
563  * @src: the string to be copied.
564  * @n: the maximum number of characters to copy.
565  *
566  * Returns: an allocated copy of at most @n characters from
567  * @src, or NULL on error.
568  */
569 
570 char *gretl_strndup (const char *src, size_t n)
571 {
572     char *targ = NULL;
573 
574     if (src != NULL) {
575 	size_t len = strlen(src);
576 
577 	if (len > n) {
578 	    len = n;
579 	}
580 
581 	targ = malloc(len + 1);
582 	if (targ != NULL) {
583 	    memcpy(targ, src, len);
584 	    targ[len] = '\0';
585 	}
586     }
587 
588     return targ;
589 }
590 
591 /**
592  * gretl_strdup_printf:
593  * @format: as in printf().
594  * @Varargs: arguments to be printed.
595  *
596  * Print the arguments according to @format.
597  *
598  * Returns: allocated result of the printing, or NULL on failure.
599  */
600 
601 char *gretl_strdup_printf (const char *format, ...)
602 {
603     va_list args;
604     char *buf = NULL;
605     int len;
606 
607 #ifdef HAVE_VASPRINTF
608     va_start(args, format);
609     len = vasprintf(&buf, format, args);
610     va_end(args);
611     if (len < 0) {
612 	buf = NULL;
613     }
614 #else
615     int bsize = 2048;
616 
617     buf = malloc(bsize);
618     if (buf == NULL) {
619 	return NULL;
620     }
621 
622     memset(buf, 0, 1);
623 
624     va_start(args, format);
625     len = vsnprintf(buf, bsize, format, args);
626     va_end(args);
627 
628     if (len >= bsize) {
629 	fputs("gretl_strdup_printf warning: string was truncated\n",
630 	      stderr);
631     }
632 #endif
633 
634     return buf;
635 }
636 
637 /**
638  * gretl_str_expand:
639  * @orig: pointer to the base string.
640  * @add: the string to be added.
641  * @sep: string to be interpolated, or NULL.
642  *
643  * Creates a newly allocated string built by concatenating
644  * @orig and @add, with @sep interpolated unless @sep is
645  * NULL, and replaces the content of @orig with the new string.
646  * As a special case, if @orig is NULL, or if the content of
647  * @orig is NULL, we just duplicate @add.
648  *
649  * Returns: the reallocated string, or NULL on failure.  In case
650  * of failure the content of @orig is freed, if @orig is not NULL,
651  * to avoid memory leakage.
652  */
653 
654 char *gretl_str_expand (char **orig, const char *add, const char *sep)
655 {
656     char *targ;
657     int n;
658 
659     if (add == NULL) {
660 	return NULL;
661     }
662 
663     if (orig == NULL || *orig == NULL) {
664 	return gretl_strdup(add);
665     }
666 
667     n = strlen(*orig);
668     if (sep != NULL) {
669 	n += strlen(sep);
670     }
671     n += strlen(add) + 1;
672 
673     targ = realloc(*orig, n);
674     if (targ == NULL) {
675 	free(*orig);
676 	*orig = NULL;
677 	return NULL;
678     }
679 
680     if (sep != NULL) {
681 	strcat(targ, sep);
682     }
683     strcat(targ, add);
684     *orig = targ;
685 
686     return targ;
687 }
688 
689 static int is_word_char (const char *s, int i, gretlopt opt)
690 {
691     unsigned char c = *s;
692 
693     if (isalnum(c) || c == '_') {
694 	return 1;
695     } else if ((opt & OPT_D) && c == '.') {
696 	return 1;
697     } else if (opt & OPT_U) {
698 	/* allow Greeks */
699 	if ((c == 0xCE || c == 0xCF) && *(s+1)) {
700 	    return 1;
701 	} else if (i > 0) {
702 	    unsigned char prev = *(s-1);
703 
704 	    return (prev == 0xCE && c >= 0x91 && c <= 0xBF) ||
705 		(prev == 0xCF && c >= 0x80 && c <= 0x89);
706 	}
707     }
708 
709     return 0;
710 }
711 
712 /**
713  * gretl_word_strdup:
714  * @src: the source string.
715  * @ptr: location to receive end of word pointer, or NULL.
716  * @opt: can include OPT_S for "strict" operation: in this
717  * case an error is flagged if @src contains any characters
718  * other than 'word' characters (see below), comma and space.
719  * Also may include OPT_D to allow dot as a "word" character,
720  * OPT_U to accept UTF-8 Greek letters.
721  * @err: location to receive error code.
722  *
723  * Copies the first 'word' found in @src, where a word
724  * is defined as consisting of alphanumeric characters
725  * and the underscore.  If @ptr is not NULL, on exit it
726  * points at the next position in @src after the copied
727  * word.
728  *
729  * Returns: the allocated word or NULL in case no word is
730  * found, or on error.
731  */
732 
733 char *gretl_word_strdup (const char *src, const char **ptr,
734 			 gretlopt opt, int *err)
735 {
736     char *targ = NULL;
737 
738     if (src == NULL) {
739 	if (ptr != NULL) {
740 	    *ptr = NULL;
741 	}
742     } else if (*src == '\0') {
743 	if (ptr != NULL) {
744 	    *ptr = src;
745 	}
746     } else {
747 	const char *p;
748 	int len = 0;
749 	int i = 0;
750 
751 	if (opt & OPT_S) {
752 	    /* strict: check for any junk */
753 	    while (*src && (*src == ' ' || *src == ',')) {
754 		src++; i++;
755 	    }
756 	    if (*src && !is_word_char(src, 0, opt)) {
757 		gretl_errmsg_sprintf(_("Unexpected symbol '%c'"), *src);
758 		*err = E_PARSE;
759 		return NULL;
760 	    }
761 	} else {
762 	    /* just skip to first 'word char' */
763 	    while (*src && !is_word_char(src, i, opt)) {
764 		src++; i++;
765 	    }
766 	}
767 
768 	/* set to start of 'word' */
769 	p = src;
770 
771 	while (is_word_char(src, i, opt)) {
772 	    len++;
773 	    src++;
774 	    i++;
775 	}
776 
777 	if (opt & OPT_S) {
778 	    if (*src != '\0' && *src != ' ' && *src != ',') {
779 		gretl_errmsg_sprintf(_("Unexpected symbol '%c'"), *src);
780 		*err = E_PARSE;
781 		return NULL;
782 	    }
783 	}
784 
785 	if (ptr != NULL) {
786 	    *ptr = src;
787 	}
788 
789 	if (len > 0) {
790 	    targ = gretl_strndup(p, len);
791 	    if (targ == NULL) {
792 		*err = E_ALLOC;
793 	    }
794 	}
795     }
796 
797     return targ;
798 }
799 
800 /**
801  * gretl_quoted_string_strdup:
802  * @s: the source string.
803  * @ptr: location to receive end pointer, or NULL.
804  *
805  * If @s starts with a quote (double or single), return a copy of
806  * the portion of @s that is enclosed in quotes.  That is,
807  * from @s + 1 up to but not including the next matching quote.
808  * If @ptr is not NULL, on output it receives a pointer to
809  * the next byte in @s after the closing quote.
810  *
811  * Returns: the allocated string or NULL on failure.
812  */
813 
814 char *gretl_quoted_string_strdup (const char *s, const char **ptr)
815 {
816     char q, *ret = NULL;
817     const char *p = NULL;
818 
819     if (s != NULL && (*s == '"' || *s == '\'')) {
820 	int gotit = 0;
821 
822 	q = *s;
823 	s++;
824 	p = s;
825 	while (*p && !gotit) {
826 	    if (*p == q && *(p-1) != '\\') {
827 		/* found non-escaped matching quote */
828 		gotit = 1;
829 	    } else {
830 		p++;
831 	    }
832 	}
833 	if (!gotit) {
834 	    p = NULL;
835 	}
836     }
837 
838     if (p == NULL) {
839 	if (ptr != NULL) {
840 	    *ptr = NULL;
841 	}
842     } else {
843 	if (ptr != NULL) {
844 	    *ptr = p + 1;
845 	}
846 	ret = gretl_strndup(s, p - s);
847     }
848 
849     return ret;
850 }
851 
852 /* variant of gretl_string_split() that respects
853    empty fields, including them in the output array
854 */
855 
856 static char **string_split_2 (const char *s, int *n,
857 			      const char *sep)
858 {
859     char **S = NULL;
860     gchar **tmp;
861     gchar *mysep;
862     int i, m = 0;
863 
864     *n = 0;
865 
866     if (sep[0] == '\t' && sep[1] == '\0') {
867 	mysep = g_strdup(sep);
868     } else {
869 	mysep = g_strstrip(g_strdup(sep));
870     }
871 
872     tmp = g_strsplit(s, mysep, -1);
873     if (tmp != NULL) {
874 	for (i=0; tmp[i]; i++) {
875 	    m++;
876 	}
877 	if (m > 0) {
878 	    S = strings_array_new(m);
879 	    if (S != NULL) {
880 		for (i=0; i<m; i++) {
881 		    S[i] = gretl_strdup(g_strstrip(tmp[i]));
882 		}
883 	    }
884 	}
885 	g_strfreev(tmp);
886     }
887 
888     g_free(mysep);
889     *n = m;
890 
891     return S;
892 }
893 
894 /* Re. the separator given to gretl_string_split():
895    if it contains anything other than whitespace
896    characters we'll respect empty fields
897 */
898 
899 static int respect_empty_fields (const char *s)
900 {
901     if (s[0] == '\t' && s[1] == '\0') {
902 	/* treat single tab as "true" separator string */
903 	return 1;
904     }
905     while (*s) {
906 	if (!isspace(*s)) {
907 	    return 1;
908 	}
909 	s++;
910     }
911 
912     return 0;
913 }
914 
915 /**
916  * gretl_string_split:
917  * @s: the source string.
918  * @n: location to receive the number of substrings.
919  * @sep: string containing the character(s) to count as
920  * field separators, or NULL. If @sep is NULL only the
921  * space character counts.
922  *
923  * Parses @s into a set of zero or more substrings and
924  * creates an array of those substrings. On sucessful exit
925  * @n holds the number of substrings.
926  *
927  * Returns: the allocated array or NULL in case of failure.
928  */
929 
930 char **gretl_string_split (const char *s, int *n,
931 			   const char *sep)
932 {
933     int i, k, m;
934     char *word;
935     char **S;
936 
937     *n = 0;
938     if (s == NULL) {
939 	return NULL;
940     }
941 
942     if (sep == NULL) {
943 	sep = " ";
944     } else if (respect_empty_fields(sep)) {
945 	return string_split_2(s, n, sep);
946     }
947 
948     m = count_fields(s, sep);
949     if (m == 0) {
950 	return NULL;
951     }
952 
953     S = strings_array_new(m);
954     if (S == NULL) {
955 	return NULL;
956     }
957 
958     for (i=0; i<m; i++) {
959 	s += strspn(s, sep);
960 	k = strcspn(s, sep);
961 	word = gretl_strndup(s, k);
962 	if (word == NULL) {
963 	    strings_array_free(S, m);
964 	    return NULL;
965 	}
966 	S[i] = word;
967 	s += k;
968     }
969 
970     *n = m;
971 
972     return S;
973 }
974 
975 /**
976  * gretl_string_split_lines:
977  * @s: the source string.
978  * @n: location to receive the number of substrings.
979  *
980  * Parses @s into a set of zero or more substrings, one per
981  * complete line of @s, and creates an array of those substrings.
982  * On sucessful exit @n holds the number of substrings.
983  *
984  * Returns: the allocated array or NULL in case of failure.
985  */
986 
987 char **gretl_string_split_lines (const char *s, int *n)
988 {
989     const char *p = s;
990     int i, len, m = 0;
991     int err = 0;
992     char **S = NULL;
993 
994     *n = 0;
995 
996     while (*p) {
997 	if (*p == '\n') {
998 	    m++;
999 	}
1000 	p++;
1001     }
1002 
1003     if (m == 0) {
1004 	return NULL;
1005     }
1006 
1007     S = strings_array_new(m);
1008     if (S == NULL) {
1009 	return NULL;
1010     }
1011 
1012     p = s;
1013     i = 0;
1014 
1015     while (*p && i < m) {
1016 	len = strcspn(p, "\r\n");
1017 	S[i] = gretl_strndup(p, len);
1018 	if (S[i] == NULL) {
1019 	    err = E_ALLOC;
1020 	    break;
1021 	}
1022 	gretl_strstrip(S[i]);
1023 	i++;
1024 	p += len;
1025 	if (*p == '\r') p++;
1026 	if (*p == '\n') p++;
1027 	if (*p == '\0') {
1028 	    break;
1029 	}
1030     }
1031 
1032     if (err) {
1033 	strings_array_free(S, m);
1034 	S = NULL;
1035     } else {
1036 	*n = m;
1037     }
1038 
1039     return S;
1040 }
1041 
1042 /**
1043  * gretl_string_split_quoted:
1044  * @s: the source string.
1045  * @n: location to receive the number of substrings.
1046  * @sep: string containing the character(s) to count as
1047  * field separators, or NULL. If @sep is NULL only space,
1048  * tab and newline count.
1049  * @err: location to receive error code.
1050  *
1051  * Similar to gretl_string_split(), except that this variant
1052  * allows for the presence of double-quoted substrings
1053  * which may contain spaces. The quotes are removed in the
1054  * members of the returned array.
1055  *
1056  * Returns: allocated array of substrings or NULL in case of failure.
1057  */
1058 
1059 char **gretl_string_split_quoted (const char *s, int *n,
1060 				  const char *sep, int *err)
1061 {
1062     const char *ignore;
1063     const char *q, *p = s;
1064     int i, len, m = 0;
1065     int grabit, quoted;
1066     char *substr;
1067     char **S;
1068 
1069     *err = 0;
1070     ignore = sep != NULL ? sep : " \t\n";
1071 
1072     *n = 0;
1073 
1074     while (*p) {
1075 	p += strspn(p, ignore);
1076 	if (*p == '"') {
1077 	    /* quoted substring */
1078 	    m++;
1079 	    q = strchr(p + 1, '"');
1080 	    if (q == NULL) {
1081 		*err = E_PARSE;
1082 		return NULL;
1083 	    }
1084 	    p = q;
1085 	} else {
1086 	    len = strcspn(p, ignore);
1087 	    if (len > 0) {
1088 		/* unquoted substring */
1089 		m++;
1090 		p += len - 1;
1091 	    }
1092 	}
1093 	if (*p == '\0') {
1094 	    break;
1095 	}
1096 	p++;
1097     }
1098 
1099     if (*err || m == 0) {
1100 	return NULL;
1101     }
1102 
1103     S = strings_array_new(m);
1104     if (S == NULL) {
1105 	*err = E_ALLOC;
1106 	return NULL;
1107     }
1108 
1109     p = s;
1110     i = 0;
1111 
1112     while (*p && i < m) {
1113 	grabit = quoted = 0;
1114 	p += strspn(p, ignore);
1115 	if (*p == '"') {
1116 	    grabit = quoted = 1;
1117 	    p++;
1118 	    len = strcspn(p, "\"");
1119 	} else {
1120 	    len = strcspn(p, ignore);
1121 	    grabit = (len > 0);
1122 	}
1123 	if (grabit) {
1124 	    substr = gretl_strndup(p, len);
1125 	    if (substr == NULL) {
1126 		*err = E_ALLOC;
1127 		strings_array_free(S, m);
1128 		return NULL;
1129 	    }
1130 	    S[i++] = substr;
1131 	    p += len + quoted;
1132 	}
1133     }
1134 
1135     *n = m;
1136 
1137     return S;
1138 }
1139 
1140 /**
1141  * gretl_trunc:
1142  * @str: the string to truncate.
1143  * @n: the desired length of the truncated string.
1144  *
1145  * Truncates the given @str to the specified length.
1146  *
1147  * Returns: the possibly truncated string.
1148  */
1149 
1150 char *gretl_trunc (char *str, size_t n)
1151 {
1152     if (n < strlen(str)) {
1153 	str[n] = '\0';
1154     }
1155 
1156     return str;
1157 }
1158 
1159 static const char *name_ok =
1160     "abcdefghijklmnopqrstuvwxyz"
1161     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1162     "0123456789_";
1163 
1164 /**
1165  * is_greek_letter:
1166  * @s: the string to test.
1167  *
1168  * Checks for the case where @s contains a greek letter,
1169  * followed by a nul byte or an ascii character that cannot
1170  * occur in a gretl identifier.
1171 
1172  * Returns: 1 if the case is met, otherwise 0.
1173  */
1174 
1175 int is_greek_letter (const char *s)
1176 {
1177     unsigned char u[2] = {0};
1178     int ret = 0;
1179 
1180     u[0] = s[0];
1181     if (s[0] != '\0') {
1182 	u[1] = s[1];
1183     }
1184 
1185     if ((u[0] == 0xCE && u[1] >= 0x91 && u[1] <= 0xBF) ||
1186 	(u[0] == 0xCF && u[1] >= 0x80 && u[1] <= 0x89)) {
1187 	char nxt = s[2];
1188 
1189 	if (nxt == '\0') {
1190 	    ret = 1;
1191 	} else if (nxt >= 32 && nxt <= 126 && !strchr(name_ok, nxt)) {
1192 	    ret = 1;
1193 	}
1194     }
1195 
1196     return ret;
1197 }
1198 
1199 /**
1200  * gretl_namechar_spn:
1201  * @s: the string to examine.
1202  *
1203  * Returns: the length of the initial segment of @s which
1204  * consists of characters that are valid in a gretl
1205  * variable or object name, namely a-z, A-Z, 0-9 and _,
1206  * starting with a letter, up to a maximum of %VNAMELEN - 1.
1207  */
1208 
1209 int gretl_namechar_spn (const char *s)
1210 {
1211     int ret = 0;
1212 
1213     if (isalpha(*s)) {
1214 	ret = strspn(s, name_ok);
1215     }
1216 
1217     if (ret == 0 && is_greek_letter(s)) {
1218 	ret = 2;
1219     }
1220 
1221     return ret;
1222 }
1223 
1224 /**
1225  * double_quote_position:
1226  * @s: the source string.
1227  *
1228  * Returns: the 0-based index of the position of the next
1229  * unescaped double-quote character in @s, or -1 if no
1230  * such character is found.
1231  */
1232 
1233 int double_quote_position (const char *s)
1234 {
1235     int i, j, ns, n = -1;
1236 
1237     for (i=0; s[i]; i++) {
1238 	if (s[i] == '"') {
1239 	    ns = 0;
1240 	    for (j=i-1; j>=0; j--) {
1241 		if (s[j] == '\\') {
1242 		    ns++;
1243 		} else {
1244 		    break;
1245 		}
1246 	    }
1247 	    if (ns % 2 == 0) {
1248 		/* got an unescaped double-quote */
1249 		n = i;
1250 		break;
1251 	    }
1252 	}
1253     }
1254 
1255     return n;
1256 }
1257 
1258 /**
1259  * count_fields:
1260  * @s: the string to process.
1261  * @sep: string containing the character(s) to count as
1262  * field separators, or NULL. If @sep is NULL only the
1263  * space character counts.
1264  *
1265  * Returns: the number of fields in @s.
1266  */
1267 
1268 int count_fields (const char *s, const char *sep)
1269 {
1270     int nf = 0;
1271 
1272     if (sep == NULL) {
1273 	sep = " ";
1274     }
1275 
1276     if (s != NULL && *s != '\0') {
1277 	const char *p;
1278 
1279 	/* step past separator(s) */
1280 	s += strspn(s, sep);
1281 
1282 	if (*s != '\0') {
1283 	    s++;
1284 	    nf++;
1285 	}
1286 
1287 	while (*s) {
1288 	    p = strpbrk(s, sep);
1289 	    if (p != NULL) {
1290 		s = p + strspn(p, sep);
1291 		if (*s) {
1292 		    nf++;
1293 		}
1294 	    } else {
1295 		break;
1296 	    }
1297 	}
1298     }
1299 
1300     return nf;
1301 }
1302 
1303 /**
1304  * count_lines:
1305  * @s: the string to process.
1306  *
1307  * Returns: the number of complete lines (lines ending
1308  * with the newline character) in @s.
1309  */
1310 
1311 int count_lines (const char *s)
1312 {
1313     int nl = 0;
1314 
1315     if (s != NULL) {
1316 	while (*s) {
1317 	    if (*s == '\n') {
1318 		nl++;
1319 	    }
1320 	    s++;
1321 	}
1322     }
1323 
1324     return nl;
1325 }
1326 
1327 /**
1328  * shift_string_left:
1329  * @str: the string to process.
1330  * @move: the number of places to shift.
1331  *
1332  * Shifts the content of @str left by @move places, dropping
1333  * leading bytes as needed.
1334  *
1335  * Returns: the modified string.
1336  */
1337 
1338 char *shift_string_left (char *str, size_t move)
1339 {
1340     size_t n = strlen(str);
1341 
1342     if (move >= n) {
1343 	*str = '\0';
1344     } else {
1345 	memmove(str, str + move, n - move);
1346 	str[n - move] = '\0';
1347     }
1348 
1349     return str;
1350 }
1351 
1352 /**
1353  * gretl_strstrip:
1354  * @str: the string to process.
1355  *
1356  * Removes leading and trailing white space from a string.
1357  *
1358  * Returns: the possibly modified string.
1359  */
1360 
1361 char *gretl_strstrip (char *str)
1362 {
1363     int i, n = strspn(str, " \t");
1364 
1365     if (n > 0) {
1366 	shift_string_left(str, n);
1367     }
1368 
1369     n = strlen(str);
1370 
1371     for (i=n-1; i>=0; i--) {
1372 	if (isspace(str[i]) || str[i] == '\r') {
1373 	    str[i] = '\0';
1374 	} else {
1375 	    break;
1376 	}
1377     }
1378 
1379     return str;
1380 }
1381 
1382 /**
1383  * gretl_strstrip_copy:
1384  * @str: the string to process.
1385  *
1386  * Returns: a copy of @str, from which both leading and
1387  * trailing white space have been removed.
1388  */
1389 
1390 char *gretl_strstrip_copy (const char *str, int *err)
1391 {
1392     char *ret = NULL;
1393     int i, n;
1394 
1395     if (str == NULL) {
1396 	*err = E_INVARG;
1397 	return NULL;
1398     }
1399 
1400     while (isspace(*str)) {
1401 	str++;
1402     }
1403 
1404     n = strlen(str);
1405 
1406     for (i=n-1; i>=0; i--) {
1407 	if (isspace(str[i]) || str[i] == '\r') {
1408 	    n--;
1409 	} else {
1410 	    break;
1411 	}
1412     }
1413 
1414     ret = gretl_strndup(str, n);
1415     if (ret == NULL) {
1416 	*err = E_ALLOC;
1417     }
1418 
1419     return ret;
1420 }
1421 
1422 /**
1423  * switch_ext:
1424  * @targ: the target or output string (must be pre-allocated).
1425  * @src: the source or input string.
1426  * @ext: the extension or suffix to attach, without leading dot.
1427  *
1428  * For processing filenames: copies @src to @targ, minus any existing
1429  * filename extension, and adds to @targ the specified extension.
1430  *
1431  * Returns: the output string, @targ.
1432  */
1433 
1434 char *switch_ext (char *targ, const char *src, const char *ext)
1435 {
1436     int i = gretl_dotpos(src);
1437 
1438     if (targ != src) {
1439 	*targ = '\0';
1440         strncat(targ, src, i);
1441     }
1442 
1443     targ[i] = '.';
1444     targ[i + 1] = '\0';
1445     strcat(targ, ext);
1446 
1447     return targ;
1448 }
1449 
1450 /**
1451  * switch_ext_in_place:
1452  * @fname: must have sufficient space to add the given extension.
1453  * @ext: the extension or suffix to attach, without dot.
1454  *
1455  * For processing filenames: removes any existing dot-extension on
1456  * @fname and appends a dot followed by @ext.
1457  *
1458  * Returns: the modified string, @fname.
1459  */
1460 
1461 char *switch_ext_in_place (char *fname, const char *ext)
1462 {
1463     int i = gretl_dotpos(fname);
1464 
1465     fname[i] = '.';
1466     fname[i + 1] = '\0';
1467     strcat(fname, ext);
1468 
1469     return fname;
1470 }
1471 
1472 /**
1473  * switch_ext_new:
1474  * @src: the original string.
1475  * @ext: the extension or suffix to attach (without leading '.').
1476  *
1477  * For processing filenames: creates a copy of @src in which
1478  * any existing dot-extension is removed and @ext is appended
1479  * (with a dot automatically inserted).
1480  *
1481  * Returns: the newly allocated string.
1482  */
1483 
1484 char *switch_ext_new (const char *src, const char *ext)
1485 {
1486     int len = strlen(src) + strlen(ext) + 2;
1487     const char *p = strrchr(src, '.');
1488     char *ret = NULL;
1489 
1490     if (p != NULL) {
1491 	len -= strlen(p);
1492     }
1493 
1494     ret = calloc(len, 1);
1495 
1496     if (ret != NULL) {
1497 	if (p != NULL) {
1498 	    strncat(ret, src, p - src);
1499 	} else {
1500 	    strcat(ret, src);
1501 	}
1502 	strcat(ret, ".");
1503 	strcat(ret, ext);
1504     }
1505 
1506     return ret;
1507 }
1508 
1509 static int ends_in_comment (const char *s, int n)
1510 {
1511     int i, quoted = 0;
1512 
1513     /* the '#' character is inert (only) if it appears
1514        within a string literal */
1515 
1516     for (i=n; i>1; i--) {
1517 	if (s[i] == '"') {
1518 	    quoted = !quoted;
1519 	} else if (!quoted && s[i] == '#') {
1520 	    return 1;
1521 	}
1522     }
1523 
1524     return 0;
1525 }
1526 
1527 #define LINE_CONT(c) (c == '\\' || c == ',' || c == '(')
1528 
1529 /**
1530  * top_n_tail:
1531  * @str: the string to process.
1532  * @maxlen: maximum length of string, including NUL termination.
1533  * @err: location to receive error code, or NULL.
1534  *
1535  * Drop leading space and trailing space and newline from string,
1536  * then replace a trailing backslash (if any) with a space.
1537  * If @str does not end with a newline within the limit set by
1538  * @maxlen, and @err is not NULL, then E_TOOLONG is written
1539  * to @err.
1540  *
1541  * Returns: 1 if a trailing backslash, comma, semicolon, or left
1542  * parenthesis was found, otherwise 0.
1543  */
1544 
1545 int top_n_tail (char *str, size_t maxlen, int *err)
1546 {
1547     int i, n, cont = 0;
1548 
1549     if (str == NULL || *str == '\0' || *str == '\n' || *str == '\r') {
1550 	return 0;
1551     }
1552 
1553     n = strlen(str) - 1;
1554 
1555     if (err != NULL && n > maxlen - 2 && str[n] != '\n') {
1556 	*err = E_TOOLONG;
1557     }
1558 
1559     /* chop any trailing space */
1560     for (i=n; i>=0; i--) {
1561 	if (isspace((unsigned char) str[i])) {
1562 	    str[i] = '\0';
1563 	    n--;
1564 	} else {
1565 	    break;
1566 	}
1567     }
1568 
1569     if (*str != '\0') {
1570 	/* Drop any leading spaces, also possible questionmark.  Try
1571 	   to catch non-breaking spaces too -- ugh, Windows!
1572 	   (NBSP is 0xA0 in Windows CP1252)
1573 	*/
1574 	i = 0;
1575 	while (isspace((unsigned char) str[i]) ||
1576 	       str[i] == '?' ||
1577 	       str[i] == (char) 0xC2 ||
1578 	       str[i] == (char) 0xA0) {
1579 	    n--;
1580 	    i++;
1581 	}
1582 	if (i > 0) {
1583 	    shift_string_left(str, i);
1584 	}
1585 
1586 	if (*str == '#' || !strncmp(str, "/*", 2)) {
1587 	    ; /* the line starts a comment: leave well alone */
1588 	} else if (n >= 0 && LINE_CONT(str[n])) {
1589 	    /* register line continuation characters at the end of
1590 	       the line, but only if not preceded by the comment
1591 	       character '#' (unquoted)
1592 	    */
1593 	    cont = !ends_in_comment(str, n - 1);
1594 	    if (cont && str[n] == '\\') {
1595 		/* replace backslash */
1596 		str[n] = ' ';
1597 	    }
1598 	}
1599     }
1600 
1601     return cont;
1602 }
1603 
1604 /**
1605  * equation_get_lhs_and_rhs:
1606  * @s: equation in string form.
1607  * @plh: pointer to receive left-hand side expression.
1608  * @prh: pointer to receive right-hand side expression.
1609  *
1610  * Given a string @s, parse it into a left-hand side and a right-hand
1611  * side, separated by an equals sign.  Return in @plh and @prh
1612  * allocated copies of the respective sides, with any leading or trailing
1613  * white space trimmed.
1614  *
1615  * Returns: 0 on success, 1 on error.
1616  */
1617 
1618 int equation_get_lhs_and_rhs (const char *s, char **plh, char **prh)
1619 {
1620     const char *p;
1621     char *lh = NULL, *rh = NULL;
1622     int i, len, err = 0;
1623 
1624     if (s == NULL || plh == NULL || prh == NULL) {
1625 	err = 1;
1626     }
1627 
1628     if (!err) {
1629 	*plh = NULL;
1630 	*prh = NULL;
1631 
1632 	p = strchr(s, '=');
1633 	if (p == NULL) {
1634 	    err = 1;
1635 	}
1636     }
1637 
1638     if (!err) {
1639 	p = s;
1640 	while (isspace(*p)) p++;
1641 	len = strcspn(p, " =");
1642 	if (len == 0) {
1643 	    err = 1;
1644 	} else {
1645 	    lh = gretl_strndup(p, len);
1646 	    if (lh == NULL) {
1647 		err = 1;
1648 	    }
1649 	}
1650     }
1651 
1652     if (!err) {
1653 	p = strchr(s, '=') + 1;
1654 	while (isspace(*p)) p++;
1655 	len = strlen(p);
1656 	if (len == 0) {
1657 	    err = 1;
1658 	} else {
1659 	    for (i=len-1; i>=0; i--) {
1660 		if (isspace(p[i])) len--;
1661 		else break;
1662 	    }
1663 	    rh = gretl_strndup(p, len);
1664 	    if (rh == NULL) {
1665 		err = 1;
1666 	    }
1667 	}
1668     }
1669 
1670     if (err) {
1671 	free(lh);
1672 	free(rh);
1673     } else {
1674 	*plh = lh;
1675 	*prh = rh;
1676     }
1677 
1678     return err;
1679 }
1680 
1681 /**
1682  * tailstrip:
1683  * @str: the string to process.
1684  *
1685  * Drop trailing space (and newline if any) from string.
1686  *
1687  * Returns: the modified string.
1688  */
1689 
1690 char *tailstrip (char *str)
1691 {
1692     int i, len;
1693 
1694     if (str == NULL || *str == '\0') {
1695 	return str;
1696     }
1697 
1698     len = strlen(str);
1699 
1700     for (i=len-1; i>=0; i--) {
1701 	if (isspace((unsigned char) str[i]) ||
1702 	    str[i] == '\n' || str[i] == '\r') {
1703 	    str[i] = '\0';
1704 	} else {
1705 	    break;
1706 	}
1707     }
1708 
1709     return str;
1710 }
1711 
1712 /**
1713  * compress_spaces:
1714  * @s: the string to process.
1715  *
1716  * Reduce multiple contiguous space characters to single spaces
1717  * within @s.
1718  *
1719  * Returns: the compressed string.
1720  */
1721 
1722 char *compress_spaces (char *s)
1723 {
1724     int i = 0, inquote = 0;
1725     char *p, *q;
1726 
1727     if (s == NULL || *s == '\0') {
1728 	return s;
1729     }
1730 
1731     p = q = s;
1732 
1733     while (*s) {
1734 	if (*s == '"' && (i == 0 || *(s-1) != '\\')) {
1735 	    inquote = !inquote;
1736 	}
1737 	if (!inquote) {
1738 	    if (*s == '\t') {
1739 		*s = ' '; /* trash tabs */
1740 	    }
1741 	    if (*s == ' ') {
1742 		p = s + 1;
1743 		if (*p == '\0') break;
1744 		while (*p == ' ') p++;
1745 		if (p - s > 1) {
1746 		    memmove(s + 1, p, strlen(p) + 1);
1747 		}
1748 	    }
1749 	}
1750 	s++;
1751 	i++;
1752     }
1753 
1754     return q;
1755 }
1756 
1757 /**
1758  * space_to_score:
1759  * @s: the string to process.
1760  *
1761  * Replace any spaces with underscores in @s.
1762  *
1763  * Returns: the (possibly) modified string.
1764  */
1765 
1766 char *space_to_score (char *s)
1767 {
1768     char *p = s;
1769 
1770     while (*p) {
1771 	if (*p == ' ') *p = '_';
1772 	p++;
1773     }
1774 
1775     return s;
1776 }
1777 
1778 /**
1779  * strings_array_new:
1780  * @nstrs: number of strings in array.
1781  *
1782  * Allocates storage for @nstrs strings and initializes all
1783  * to NULL.
1784  *
1785  * Returns: the allocated array, or NULL on failure.
1786  */
1787 
1788 char **strings_array_new (int nstrs)
1789 {
1790     char **s;
1791     int i;
1792 
1793     if (nstrs <= 0) {
1794 	return NULL;
1795     }
1796 
1797     s = malloc(nstrs * sizeof *s);
1798     if (s != NULL) {
1799 	for (i=0; i<nstrs; i++) {
1800 	    s[i] = NULL;
1801 	}
1802     }
1803 
1804     return s;
1805 }
1806 
1807 /**
1808  * strings_array_add:
1809  * @pS: pointer to strings array.
1810  * @n: location of present number of strings in array.
1811  * @p: string to add to array.
1812  *
1813  * Allocates storage for an extra member of @S and adds a
1814  * copy of string @p in the last position.  On success,
1815  * the content of @n is incremented by 1.
1816  *
1817  * Returns: 0 on success, %E_ALLOC on failure.
1818  */
1819 
1820 int strings_array_add (char ***pS, int *n, const char *p)
1821 {
1822     char **Tmp;
1823     int m = *n;
1824 
1825     Tmp = realloc(*pS, (m + 1) * sizeof *Tmp);
1826     if (Tmp == NULL) {
1827 	return E_ALLOC;
1828     }
1829 
1830     *pS = Tmp;
1831 
1832     if (p != NULL) {
1833 	Tmp[m] = gretl_strdup(p);
1834 	if (Tmp[m] == NULL) {
1835 	    return E_ALLOC;
1836 	}
1837     } else {
1838 	Tmp[m] = NULL;
1839     }
1840 
1841     *n += 1;
1842 
1843     return 0;
1844 }
1845 
1846 /**
1847  * strings_array_prepend_uniq:
1848  * @pS: pointer to strings array.
1849  * @n: location of present number of strings in array.
1850  * @p: string to prepend to array.
1851  *
1852  * If @p is already present in the array at location @pS,
1853  * moves it into first position if it is not already there.
1854  * Otherwise allocates storage for an extra member of the
1855  * array and pushes @p into first position, in which case
1856  * the content of @n is incremented by 1.
1857  *
1858  * Returns: 0 on success, %E_ALLOC on failure.
1859  */
1860 
1861 int strings_array_prepend_uniq (char ***pS, int *n, const char *p)
1862 {
1863     char **S = *pS;
1864     char *s0;
1865     int m = *n;
1866     int i, pos = -1;
1867 
1868     if (p == NULL) {
1869 	return E_DATA;
1870     }
1871 
1872     for (i=0; i<m; i++) {
1873 	if (!strcmp(S[i], p)) {
1874 	    pos = i;
1875 	    break;
1876 	}
1877     }
1878 
1879     if (pos == 0) {
1880 	/* already present in 1st position */
1881 	return 0;
1882     } else if (pos > 0) {
1883 	/* already present, not first */
1884 	s0 = S[pos];
1885 	for (i=pos; i>0; i--) {
1886 	    S[i] = S[i-1];
1887 	}
1888 	S[0] = s0;
1889     } else if (pos < 0) {
1890 	/* not present */
1891 	S = realloc(*pS, (m + 1) * sizeof *S);
1892 	if (S == NULL) {
1893 	    return E_ALLOC;
1894 	}
1895 	s0 = gretl_strdup(p);
1896 	if (s0 == NULL) {
1897 	    return E_ALLOC;
1898 	}
1899 	*pS = S;
1900 	*n = ++m;
1901 	/* shuffle up */
1902 	for (i=m; i>0; i--) {
1903 	    S[i] = S[i-1];
1904 	}
1905 	/* and insert @p */
1906 	S[0] = s0;
1907     }
1908 
1909     return 0;
1910 }
1911 
1912 /**
1913  * strings_array_donate:
1914  * @pS: pointer to strings array.
1915  * @n: location of present number of strings in array.
1916  * @p: string to append to array.
1917  *
1918  * Allocates storage for an extra member of @S and adds
1919  * string @p in the last position. Unlike strings_array_add(),
1920  * the array takes ownnership of @p rather than copying it.
1921  * On success, the content of @n is incremented by 1.
1922  *
1923  * Returns: 0 on success, %E_ALLOC on failure.
1924  */
1925 
1926 int strings_array_donate (char ***pS, int *n, char *p)
1927 {
1928     char **Tmp;
1929     int m = *n;
1930 
1931     Tmp = realloc(*pS, (m + 1) * sizeof *Tmp);
1932     if (Tmp == NULL) {
1933 	return E_ALLOC;
1934     }
1935 
1936     *pS = Tmp;
1937 
1938     if (p != NULL) {
1939 	Tmp[m] = p;
1940 	if (Tmp[m] == NULL) {
1941 	    return E_ALLOC;
1942 	}
1943     } else {
1944 	Tmp[m] = NULL;
1945     }
1946 
1947     *n += 1;
1948 
1949     return 0;
1950 }
1951 
1952 /**
1953  * strings_array_add_uniq:
1954  * @pS: pointer to strings array.
1955  * @n: location of present number of strings in array.
1956  * @p: string to test for addition to array.
1957  * @pos: location to receive the position of @p in the
1958  * array (whether already matched or newly added), or NULL.
1959  *
1960  * If the array does not already include a copy of @p,
1961  * allocates storage for an extra member of @pS and adds a
1962  * copy of string @p in the last position. On successful
1963  * addition the content of @n is incremented by 1.
1964  *
1965  * Returns: 0 on success, %E_ALLOC on failure.
1966  */
1967 
1968 int strings_array_add_uniq (char ***pS, int *n, const char *p,
1969 			    int *pos)
1970 {
1971     char **Tmp, **S = *pS;
1972     int m = *n;
1973     int i;
1974 
1975     for (i=0; i<m; i++) {
1976 	if (S[i] != NULL && strcmp(S[i], p) == 0) {
1977 	    if (pos != NULL) {
1978 		*pos = i;
1979 	    }
1980 	    return 0; /* no-op */
1981 	}
1982     }
1983 
1984     Tmp = realloc(S, (m + 1) * sizeof *Tmp);
1985     if (Tmp == NULL) {
1986 	return E_ALLOC;
1987     }
1988 
1989     if (p != NULL) {
1990 	Tmp[m] = gretl_strdup(p);
1991 	if (Tmp[m] == NULL) {
1992 	    return E_ALLOC;
1993 	}
1994     } else {
1995 	Tmp[m] = NULL;
1996     }
1997 
1998     if (pos != NULL) {
1999 	*pos = m;
2000     }
2001 
2002     *pS = Tmp;
2003     *n += 1;
2004 
2005     return 0;
2006 }
2007 
2008 /**
2009  * strings_array_new_with_length:
2010  * @nstrs: number of strings in array.
2011  * @len: number of bytes per string.
2012  *
2013  * Allocates storage for @nstrs strings, each of them
2014  * @len bytes long.  The first byte of each string is
2015  * initialized to 0.
2016  *
2017  * Returns: the allocated array, or NULL on failure.
2018  */
2019 
2020 char **strings_array_new_with_length (int nstrs, int len)
2021 {
2022     char **S;
2023     int i, j;
2024 
2025     if (nstrs <= 0) {
2026 	return NULL;
2027     }
2028 
2029     S = malloc(nstrs * sizeof *S);
2030     if (S == NULL) return NULL;
2031 
2032     for (i=0; i<nstrs; i++) {
2033 	S[i] = malloc(len);
2034 	if (S[i] == NULL) {
2035 	    for (j=0; j<i; j++) {
2036 		free(S[j]);
2037 	    }
2038 	    free(S);
2039 	    return NULL;
2040 	}
2041 	S[i][0] = '\0';
2042     }
2043 
2044     return S;
2045 }
2046 
2047 /**
2048  * strings_array_realloc_with_length:
2049  * @pS: existing array to reallocate.
2050  * @oldn: original number of strings in the array.
2051  * @newn: new number of strings in array.
2052  * @len: number of bytes per string.
2053  *
2054  * Adjusts the storage in @pS to a size of @newn
2055  * strings, each of them @len bytes long.  The first
2056  * byte of any additional strings is initialized to 0.
2057  * This function may be used either to expand or to
2058  * shrink an existing array of strings.
2059  *
2060  * Returns: the new array, or NULL on failure.
2061  */
2062 
2063 char **strings_array_realloc_with_length (char ***pS,
2064 					  int oldn,
2065 					  int newn,
2066 					  int len)
2067 {
2068     char **S;
2069     int i, j;
2070 
2071     if (pS == NULL) {
2072 	/* huh? */
2073 	return NULL;
2074     }
2075 
2076     if (newn == oldn) {
2077 	/* no-op */
2078 	return *pS;
2079     }
2080 
2081     if (newn <= 0) {
2082 	strings_array_free(*pS, oldn);
2083 	*pS = NULL;
2084 	return NULL;
2085     }
2086 
2087     /* in case we're shrinking the array */
2088     for (i=newn; i<oldn; i++) {
2089 	free((*pS)[i]);
2090 	(*pS)[i] = NULL;
2091     }
2092 
2093     S = realloc(*pS, newn * sizeof *S);
2094     if (S == NULL) {
2095 	strings_array_free(*pS, oldn);
2096 	*pS = NULL;
2097 	return NULL;
2098     }
2099 
2100     *pS = S;
2101 
2102     /* in case we're expanding the array */
2103     for (i=oldn; i<newn; i++) {
2104 	S[i] = malloc(len);
2105 	if (S[i] == NULL) {
2106 	    for (j=0; j<i; j++) {
2107 		free(S[j]);
2108 	    }
2109 	    free(*pS);
2110 	    *pS = NULL;
2111 	    return NULL;
2112 	}
2113 	S[i][0] = '\0';
2114     }
2115 
2116     return *pS;
2117 }
2118 
2119 /**
2120  * strings_array_dup:
2121  * @strs: array of strings to be copied.
2122  * @n: number of strings in array.
2123  *
2124  * Returns: an allocated copy of @strs, or NULL on failure.
2125  */
2126 
2127 char **strings_array_dup (char **strs, int n)
2128 {
2129     char **S;
2130     int i, j;
2131 
2132     if (n <= 0 || strs == NULL) {
2133 	return NULL;
2134     }
2135 
2136     S = malloc(n * sizeof *S);
2137     if (S == NULL) return NULL;
2138 
2139     for (i=0; i<n; i++) {
2140 	if (strs[i] == NULL) {
2141 	    S[i] = NULL;
2142 	} else {
2143 	    S[i] = gretl_strdup(strs[i]);
2144 	    if (S[i] == NULL) {
2145 		for (j=0; j<i; j++) {
2146 		    free(S[j]);
2147 		}
2148 		free(S);
2149 		return NULL;
2150 	    }
2151 	}
2152     }
2153 
2154     return S;
2155 }
2156 
2157 static int compare_strings (const void *a, const void *b)
2158 {
2159     const char **sa = (const char **) a;
2160     const char **sb = (const char **) b;
2161 
2162     return strcmp(*sa, *sb);
2163 }
2164 
2165 /**
2166  * strings_array_sort:
2167  * @pS: location of array of strings.
2168  * @n: location of the number of strings in the array.
2169  * @opt: may contain %OPT_U to trim the sorted array
2170  * so that it contains only unique entries.
2171  *
2172  * Sorts an array of strings in ascending lexicographical
2173  * order. If %OPT_U is given, @n holds the number of unique
2174  * strings on exit. It is assumed that storage for the
2175  * strings array was obtained via strings_array_new() or
2176  * a similar libgretl function.
2177  *
2178  * Returns: 0 on success, non-zero on error.
2179  */
2180 
2181 int strings_array_sort (char ***pS, int *n, gretlopt opt)
2182 {
2183     char **S;
2184     int ns;
2185 
2186     if (pS == NULL || n == NULL) {
2187 	return E_DATA;
2188     }
2189 
2190     S = *pS;
2191     ns = *n;
2192 
2193     qsort(S, ns, sizeof *S, compare_strings);
2194 
2195     if (opt & OPT_U) {
2196 	int i, j, m = ns;
2197 
2198 	for (i=0; i<m-1; i++) {
2199 	    if (!strcmp(S[i], S[i+1])) {
2200 		free(S[i+1]);
2201 		for (j=i+1; j<m-1; j++) {
2202 		    S[j] = S[j+1];
2203 		}
2204 		S[m-1] = NULL;
2205 		i--;
2206 		m--;
2207 	    }
2208 	}
2209 	if (m < ns) {
2210 	    char **tmp = realloc(S, m * sizeof *S);
2211 
2212 	    if (tmp != NULL) {
2213 		*pS = tmp;
2214 	    }
2215 	    *n = m;
2216 	}
2217     }
2218 
2219     return 0;
2220 }
2221 
2222 /**
2223  * strings_array_cmp:
2224  * @strs1: first array of strings.
2225  * @strs2: second array of strings.
2226  * @n: number of strings to examine.
2227  *
2228  * Compares for equality two arrays of strings, each of
2229  * which must contain at least @n elements.  Equality
2230  * of the arrays means that strcmp returns 0 for
2231  * each pair of strings @strs1[i], @strs2[i], for i
2232  * equals 0 to @n - 1.
2233  *
2234  * Returns: 0 if the arrays compare equal, non-zero
2235  * otherwise.
2236  */
2237 
2238 int strings_array_cmp (char **strs1, char **strs2, int n)
2239 {
2240     int i, ret = 0;
2241 
2242     for (i=0; i<n && !ret; i++) {
2243 	ret = strcmp(strs1[i], strs2[i]);
2244     }
2245 
2246     return ret;
2247 }
2248 
2249 /**
2250  * strings_array_position:
2251  * @strs: array of strings.
2252  * @n: number of elements in @strs.
2253  * @s: string to test.
2254  *
2255  * Returns: the 0-based position of the first member of @strs
2256  * to compare equal to @s, or -1 if no match is found.
2257  */
2258 
2259 int strings_array_position (char **strs, int n, const char *s)
2260 {
2261     int i, ret = -1;
2262 
2263     if (s != NULL) {
2264 	for (i=0; i<n && ret<0; i++) {
2265 	    if (strs[i] != NULL && !strcmp(strs[i], s)) {
2266 		ret = i;
2267 	    }
2268 	}
2269     }
2270 
2271     return ret;
2272 }
2273 
2274 /**
2275  * strings_array_diff:
2276  * @strs1: first array of strings.
2277  * @n1: number of strings in @strs1.
2278  * @strs2: second array of strings.
2279  * @n2: number of strings in @strs2.
2280  * @extra: location to receive array of strings present
2281  * in @strs2 but not in @strs1 (or NULL).
2282  * @n_extra: location to receive bumber of strings in
2283  * the @extra array (or NULL).
2284  *
2285  * Fills out @extra with any strings present in @strs2
2286  * but not in @strs1.
2287  *
2288  * Returns: 0 on success, non-zero otherwise.
2289  */
2290 
2291 int strings_array_diff (char **strs1, int n1,
2292 			char **strs2, int n2,
2293 			char ***extra, int *n_extra)
2294 {
2295     char **S = NULL;
2296     int i, j, n, found;
2297     int err = 0;
2298 
2299     if (extra != NULL && n_extra != NULL) {
2300 	n = 0;
2301 	for (j=0; j<n2 && !err; j++) {
2302 	    found = 0;
2303 	    for (i=0; i<n1 && !found; i++) {
2304 		if (!strcmp(strs2[j], strs1[i])) {
2305 		    found = 1;
2306 		}
2307 	    }
2308 	    if (!found) {
2309 		err = strings_array_add(&S, &n, strs2[j]);
2310 	    }
2311 	}
2312 	if (!err) {
2313 	    *extra = S;
2314 	    *n_extra = n;
2315 	}
2316     }
2317 
2318     return err;
2319 }
2320 
2321 /**
2322  * strings_array_reverse:
2323  * @strs: array of allocated strings.
2324  * @nstrs: number of strings in array.
2325  *
2326  * Returns: an array of strings with @nstrs in which the
2327  * strings are those in @strs, in reverse order -- or
2328  * NULL on failure.
2329  */
2330 
2331 char **strings_array_reverse (char **strs, int nstrs)
2332 {
2333     char **S = NULL;
2334     int i;
2335 
2336     S = strings_array_new(nstrs);
2337 
2338     if (S != NULL) {
2339 	for (i=0; i<nstrs; i++) {
2340 	    S[i] = gretl_strdup(strs[nstrs-i-1]);
2341 	    if (S[i] == NULL) {
2342 		strings_array_free(S, nstrs);
2343 		break;
2344 	    }
2345 	}
2346     }
2347 
2348     return S;
2349 }
2350 
2351 /**
2352  * strings_array_free:
2353  * @strs: array of allocated strings.
2354  * @nstrs: number of strings in array.
2355  *
2356  * Frees each allocated string in @strs, then frees @strs itself.
2357  * Checks that @strs is not NULL before proceeding.
2358  */
2359 
2360 void strings_array_free (char **strs, int nstrs)
2361 {
2362     int i;
2363 
2364     if (strs != NULL) {
2365 	for (i=0; i<nstrs; i++) {
2366 	    free(strs[i]);
2367 	}
2368 	free(strs);
2369     }
2370 }
2371 
2372 /**
2373  * get_obs_string:
2374  * @obs: char array big enough to hold the observation (#OBSLEN).
2375  * @t: zero-based observation number.
2376  * @dset: pointer to dataset information.
2377  *
2378  * Returns: the observation string corresponding to @t.
2379  */
2380 
2381 char *get_obs_string (char *obs, int t, const DATASET *dset)
2382 {
2383     if (dataset_has_markers(dset)) {
2384 	strcpy(obs, dset->S[t]);
2385     } else {
2386 	ntolabel(obs, t, dset);
2387     }
2388 
2389     return obs;
2390 }
2391 
2392 /**
2393  * obs_str_to_double:
2394  * @obs: string representation of observation number.
2395  *
2396  * Returns: the floating-point counterpart of @obs,
2397  * or #NADBL on invalid input.
2398  */
2399 
2400 double obs_str_to_double (const char *obs)
2401 {
2402     char *p, *test, tmp[OBSLEN];
2403     double ret;
2404 
2405     strcpy(tmp, obs);
2406     p = tmp;
2407 
2408     while (*p) {
2409 	if (*p == ':' || *p == ',') {
2410 	    *p = '.';
2411 	}
2412 	p++;
2413     }
2414 
2415     errno = 0;
2416 
2417     gretl_push_c_numeric_locale();
2418     ret = strtod(tmp, &test);
2419     gretl_pop_c_numeric_locale();
2420 
2421     if (*test != '\0' || errno == ERANGE) {
2422 	ret = NADBL;
2423     }
2424 
2425     return ret;
2426 }
2427 
2428 /**
2429  * colonize_obs:
2430  * @obs: string representation of observation number.
2431  *
2432  * Converts a decimal point in @obs to a colon.
2433  *
2434  * Returns: the (possibly) modified obs string.
2435  */
2436 
2437 char *colonize_obs (char *obs)
2438 {
2439     char *p = obs;
2440 
2441     while (*p) {
2442 	if (*p == '.' || *p == ',') {
2443 	    *p = ':';
2444 	}
2445 	p++;
2446     }
2447 
2448     return obs;
2449 }
2450 
2451 /**
2452  * modify_obs_for_csv:
2453  * @s: observation string (date).
2454  * @pd: data frequency.
2455  *
2456  * Modifies the observation string corresponding to obervation @t to
2457  * producing a form suitable for a CSV file.  This applies only to
2458  * time series data. The string @s should be obtained by calling
2459  * ntolabel();
2460  */
2461 
2462 void modify_date_for_csv (char *s, int pd)
2463 {
2464     if (pd == 4) {
2465 	gretl_charsub(s, ':', 'Q');
2466     } else {
2467 	gretl_charsub(s, ':', 'M');
2468     }
2469 }
2470 
2471 /**
2472  * print_time:
2473  * @s: string into which to print: must be at least 48 bytes.
2474  *
2475  * Returns: @s, which will contain a string representation of the
2476  * current date and time, in the format YYYY-mm-dd H:M.
2477  */
2478 
2479 char *print_time (char *s)
2480 {
2481     time_t now = time(NULL);
2482     struct tm *local;
2483 
2484     local = localtime(&now);
2485     strftime(s, 47, "%Y-%m-%d %H:%M", local);
2486 
2487     return s;
2488 }
2489 
2490 /**
2491  * gretl_xml_validate:
2492  * @s: string to be tested.
2493  *
2494  * Returns: 1 if @s is acceptable for insertion into an XML file
2495  * as is, 0 if it contains special characters that need to be
2496  * escaped.  See also gretl_xml_encode().
2497  */
2498 
2499 int gretl_xml_validate (const char *s)
2500 {
2501     while (*s) {
2502 	if (*s == '&' || *s == '<' || *s == '>' || *s == '"') {
2503 	    return 0;
2504 	}
2505 	s++;
2506     }
2507 
2508     return 1;
2509 }
2510 
2511 /**
2512  * gretl_xml_encode:
2513  * @str: NUL-terminated source string.
2514  *
2515  * Returns: an allocated re-write of @str, with characters that are
2516  * special in XML encoded as character entities.  See also
2517  * gretl_xml_validate().
2518  */
2519 
2520 char *gretl_xml_encode (const char *str)
2521 {
2522     char *targ, *p;
2523     const char *s = str;
2524     int len = strlen(s) + 1;
2525 
2526     while (*s) {
2527 	if (*s == '&') len += 4;
2528 	else if (*s == '<') len += 3;
2529 	else if (*s == '>') len += 3;
2530 	else if (*s == '"') len += 5;
2531 	s++;
2532     }
2533 
2534     targ = malloc(len);
2535     if (targ == NULL) {
2536 	gretl_errmsg_set(_("out of memory in XML encoding"));
2537 	return NULL;
2538     }
2539 
2540     s = str;
2541     p = targ;
2542 
2543     while (*s) {
2544 	if (*s == '&') {
2545 	    strcpy(p, "&amp;");
2546 	    p += 5;
2547 	} else if (*s == '<') {
2548 	    strcpy(p, "&lt;");
2549 	    p += 4;
2550 	} else if (*s == '>') {
2551 	    strcpy(p, "&gt;");
2552 	    p += 4;
2553 	} else if (*s == '"') {
2554 	    strcpy(p, "&quot;");
2555 	    p += 6;
2556 	} else {
2557 	    *p++ = *s;
2558 	}
2559 	s++;
2560     }
2561 
2562     targ[len-1] = '\0';
2563 
2564 #ifdef XML_DEBUG
2565     fprintf(stderr, "done gretl_xml_encode: targ='%s'\n", targ);
2566 #endif
2567 
2568     return targ;
2569 }
2570 
2571 /**
2572  * gretl_xml_encode_to_buf:
2573  * @targ: target buffer.
2574  * @src: NUL-terminated source string.
2575  * @n: size of @targ in bytes.
2576  *
2577  * Writes into @targ a version of @src in which characters that are
2578  * special in XML are encoded as character entities.  See also
2579  * gretl_xml_encode() for the case where the encoding of @src is
2580  * of unknown size at compile time.
2581  *
2582  * Returns: 0 on success or 1 on error.  An error occurs if (a) the
2583  * encoded version of @src is longer than @n bytes (allowing for NUL
2584  * termination), or (b) @src does not validate as UTF-8.  On error
2585  * the conversion is not done.
2586  */
2587 
2588 int gretl_xml_encode_to_buf (char *targ, const char *src, int n)
2589 {
2590     const char *s = src;
2591     int len = strlen(s) + 1;
2592 
2593     if (!g_utf8_validate(src, -1, NULL)) {
2594 	fprintf(stderr, "gretl_xml_encode_to_buf: source not UTF-8\n");
2595 	return 1;
2596     }
2597 
2598     while (*s) {
2599 	if (*s == '&') len += 4;
2600 	else if (*s == '<') len += 3;
2601 	else if (*s == '>') len += 3;
2602 	else if (*s == '"') len += 5;
2603 	s++;
2604     }
2605 
2606     *targ = '\0';
2607 
2608     if (len > n) {
2609 	fprintf(stderr, "gretl_xml_encode_to_buf: buffer too small\n");
2610 	return 1;
2611     }
2612 
2613     s = src;
2614 
2615     while (*s) {
2616 	if (*s == '&') {
2617 	    strcpy(targ, "&amp;");
2618 	    targ += 5;
2619 	} else if (*s == '<') {
2620 	    strcpy(targ, "&lt;");
2621 	    targ += 4;
2622 	} else if (*s == '>') {
2623 	    strcpy(targ, "&gt;");
2624 	    targ += 4;
2625 	} else if (*s == '"') {
2626 	    strcpy(targ, "&quot;");
2627 	    targ += 6;
2628 	} else {
2629 	    *targ++ = *s;
2630 	}
2631 	s++;
2632     }
2633 
2634     *targ = '\0';
2635 
2636     return 0;
2637 }
2638 
2639 static char x2c (char *s)
2640 {
2641     register char digit;
2642 
2643     digit = (s[0] >= 'A' ? ((s[0] & 0xdf) - 'A') + 10 : (s[0] - '0'));
2644     digit *= 16;
2645     digit += (s[1] >= 'A' ? ((s[1] & 0xdf) - 'A') + 10 : (s[1] - '0'));
2646     return digit;
2647 }
2648 
2649 /**
2650  * unescape_url:
2651  * @url: string representing a URL.
2652  *
2653  */
2654 
2655 void unescape_url (char *url)
2656 {
2657     register int x, y;
2658 
2659     for (x=0, y=0; url[y]; ++x, ++y) {
2660         if ((url[x] = url[y]) == '%') {
2661             url[x] = x2c(&url[y+1]);
2662             y += 2;
2663         }
2664     }
2665     url[x] = '\0';
2666 }
2667 
2668 /**
2669  * make_varname_unique:
2670  * @vname: tentative name for variable.
2671  * @v: the ID number for the new variable.
2672  * @dset: dataset information.
2673  *
2674  * Given a tentative name for a new variable, check that it
2675  * is not a duplicate of an existing varname.  If it is,
2676  * modify the new name so that it becomes unique. The ID
2677  * number @v is required so that, if the variable has already
2678  * been added to the dataset, its name does not appear to
2679  * conflict with itself!  If the name to be tested is not
2680  * associated with an existing variable, pass 0 for @v.
2681  *
2682  * Returns: the (possibly modified) variable name.
2683  */
2684 
2685 char *make_varname_unique (char *vname, int v, DATASET *dset)
2686 {
2687     const char *sfx = "abcdefghijklmnopqrstuvwxzy"
2688 	"ABCDEFGHIJKLMNOPQRSTUVWXZY";
2689     size_t n, nmax = VNAMELEN - 8;
2690     char tmp[5] = {0};
2691     int i, j, k, vi;
2692     int unique = 1;
2693 
2694     /* first off: see if the series name is already unique! */
2695     vi = current_series_index(dset, vname);
2696     if ((v == 0 && vi < 0) || vi == v) {
2697 	return vname;
2698     }
2699 
2700     /* strategy: cut @vname down to a length that permits
2701        addition of a suffix (if necessary), then add a
2702        suffix composed of underscore and three (ASCII)
2703        letters. This allows for 52^3 = 140608 unique
2704        suffixes.
2705     */
2706 
2707     n = strlen(vname);
2708     if (n > nmax) {
2709 	n = nmax;
2710     }
2711 
2712     tmp[0] = '_';
2713     unique = 0;
2714 
2715     for (i=0; i<52 && !unique; i++) {
2716 	tmp[1] = sfx[i];
2717 	for (j=0; j<52 && !unique; j++) {
2718 	    tmp[2] = sfx[j];
2719 	    for (k=0; k<52 && !unique; k++) {
2720 		tmp[3] = sfx[k];
2721 		vname[n] = '\0';
2722 		strcat(vname, tmp);
2723 		unique = 1;
2724 		for (vi = 1; vi < dset->v; vi++) {
2725 		    if (vi != v && !strcmp(vname, dset->varname[vi])) {
2726 			unique = 0;
2727 			break;
2728 		    }
2729 		}
2730 	    }
2731 	}
2732     }
2733 
2734     if (!unique) {
2735 	fprintf(stderr, "make_varname_unique: unresolved conflict!\n");
2736     }
2737 
2738     return vname;
2739 }
2740 
2741 int fix_varname_duplicates (DATASET *dset)
2742 {
2743     int msg_done, dups = 0;
2744     int i, j;
2745 
2746     for (i=1; i<dset->v; i++) {
2747 	msg_done = 0;
2748 	for (j=i+1; j<dset->v; j++) {
2749 	    if (!strcmp(dset->varname[i], dset->varname[j])) {
2750 		if (!msg_done) {
2751 		    fprintf(stderr, "'%s' duplicated variable name\n",
2752 			    dset->varname[i]);
2753 		    msg_done = 1;
2754 		}
2755 		dups = 1;
2756 		make_varname_unique(dset->varname[j], j, dset);
2757 	    }
2758 	}
2759     }
2760 
2761     return dups;
2762 }
2763 
2764 char *append_dir (char *fname, const char *dir)
2765 {
2766     size_t len;
2767 
2768     if (dir == NULL) {
2769 	return fname;
2770     }
2771 
2772     len = strlen(fname);
2773 
2774     if (fname[len - 1] == '/' || fname[len - 1] == '\\') {
2775         strcat(fname, dir);
2776     } else {
2777         strcat(fname, SLASHSTR);
2778         strcat(fname, dir);
2779     }
2780 
2781     strcat(fname, SLASHSTR);
2782 
2783     return fname;
2784 }
2785 
2786 /**
2787  * path_last_element:
2788  * @path: path to work on.
2789  *
2790  * Returns: a pointer to the last element of @path, that is,
2791  * the element following the last path separator character, if any.
2792  * If @path does not contain a separator, @path itself is returned.
2793  * Note that the return value may be the empty string, if @path
2794  * ends with a separator.
2795  */
2796 
2797 const char *path_last_element (const char *path)
2798 {
2799     const char *p = strrslash(path);
2800 
2801     if (p == NULL) {
2802 	p = path;
2803     } else {
2804 	p++;
2805     }
2806 
2807     return p;
2808 }
2809 
2810 /**
2811  * trim_slash:
2812  * @s: string to work on.
2813  *
2814  * If @s ends with #SLASH, remove this character.
2815  *
2816  * Returns: the (possibly) modified string.
2817  */
2818 
2819 char *trim_slash (char *s)
2820 {
2821     int n = strlen(s);
2822 
2823     if (n > 0 && (s[n-1] == SLASH)) {
2824 	s[n-1] = '\0';
2825     }
2826 
2827     return s;
2828 }
2829 
2830 /**
2831  * gretl_string_ends_with:
2832  * @s: string to examine.
2833  * @test: string to test for.
2834  *
2835  * Returns: 1 if @s ends with @test, else 0.
2836  */
2837 
2838 int gretl_string_ends_with (const char *s, const char *test)
2839 {
2840     int nt = strlen(test);
2841     int n = strlen(s);
2842     int ret = 0;
2843 
2844     if (n >= nt) {
2845 	const char *p = s + n - nt;
2846 
2847 	ret = !strcmp(p, test);
2848     }
2849 
2850     return ret;
2851 }
2852 
2853 /**
2854  * get_column_widths:
2855  * @strs: array of @n strings.
2856  * @widths: array of @n default column widths.
2857  * @n: number of columns.
2858  *
2859  * If need be, increases the column widths in @widths to
2860  * accomodate the current translations of @strs.
2861  */
2862 
2863 void get_column_widths (const char **strs, int *widths, int n)
2864 {
2865     int i, len;
2866 
2867     for (i=0; i<n; i++) {
2868 	len = g_utf8_strlen(_(strs[i]), -1);
2869 	if (len > widths[i]) {
2870 	    widths[i] = len;
2871 	}
2872     }
2873 }
2874 
2875 /**
2876  * gretl_utf8_strncat:
2877  * @dest: destination string.
2878  * @src: source string.
2879  * @n: maximum number of bytes to append.
2880  *
2881  * Works just like strncat(), except that it ensures that we
2882  * don't end up with an incomplete UTF-8 character preceding
2883  * the terminating NUL byte.
2884  *
2885  * Returns: the destination string.
2886  */
2887 
2888 char *gretl_utf8_strncat (char *dest, const char *src, size_t n)
2889 {
2890     const char *p = src;
2891     size_t b, b0 = 0;
2892 
2893     while (p && *p) {
2894 	p = g_utf8_next_char(p);
2895 	if (p) {
2896 	    b = p - src;
2897 	    if (b > n) {
2898 		break;
2899 	    }
2900 	    b0 = b;
2901 	}
2902     }
2903 
2904     return strncat(dest, src, b0);
2905 }
2906 
2907 /**
2908  * gretl_utf8_strncat_trim:
2909  * @dest: destination string.
2910  * @src: source string.
2911  * @n: maximum number of bytes to append.
2912  *
2913  * The same as gretl_utf8_strncat(), except that any leading and/or
2914  * trailing white space is trimmed from @dest.
2915  *
2916  * Returns: the destination string.
2917  */
2918 
2919 char *gretl_utf8_strncat_trim (char *dest, const char *src, size_t n)
2920 {
2921     const char *p;
2922     size_t b, b0 = 0;
2923     int i;
2924 
2925     src += strspn(src, " \t\r\n");
2926     p = src;
2927 
2928     while (p && *p) {
2929 	p = g_utf8_next_char(p);
2930 	if (p) {
2931 	    b = p - src;
2932 	    if (b > n) {
2933 		break;
2934 	    }
2935 	    b0 = b;
2936 	}
2937     }
2938 
2939     strncat(dest, src, b0);
2940 
2941     n = strlen(dest);
2942 
2943     for (i=n-1; i>=0; i--) {
2944 	if (isspace(dest[i]) || dest[i] == '\r') {
2945 	    dest[i] = '\0';
2946 	} else {
2947 	    break;
2948 	}
2949     }
2950 
2951     return dest;
2952 }
2953 
2954 /**
2955  * gretl_utf8_truncate:
2956  * @s: string to process.
2957  * @nmax: maximum number of characters to retain.
2958  *
2959  * Truncates @s to a maximum length of @nmax UTF-8 characters,
2960  * ensuring that we don't end up with an incomplete UTF-8
2961  * character preceding the terminating NUL byte.
2962  *
2963  * Returns: the (possibly truncated) string.
2964  */
2965 
2966 char *gretl_utf8_truncate (char *s, size_t nmax)
2967 {
2968     char *p = s;
2969     size_t n = 0;
2970 
2971     while (p && *p) {
2972 	p = g_utf8_next_char(p);
2973 	if (p && *p) {
2974 	    if (++n == nmax) {
2975 		*p = '\0';
2976 		break;
2977 	    }
2978 	}
2979     }
2980 
2981     return s;
2982 }
2983 
2984 /**
2985  * gretl_utf8_truncate_b:
2986  * @s: string to process.
2987  * @bmax: maximum number of bytes to retain.
2988  *
2989  * Truncates @s to a maximum length of @bmax bytes,
2990  * ensuring that we don't end up with an incomplete UTF-8
2991  * character preceding the terminating NUL byte.
2992  *
2993  * Returns: the (possibly truncated) string.
2994  */
2995 
2996 char *gretl_utf8_truncate_b (char *s, size_t bmax)
2997 {
2998     char *p = s;
2999     size_t b = 0;
3000 
3001     while (p && *p) {
3002 	p = g_utf8_next_char(p);
3003 	b = p - s;
3004 	if (b == bmax) {
3005 	    *p = '\0';
3006 	    break;
3007 	} else if (b > bmax) {
3008 	    p = g_utf8_prev_char(p);
3009 	    *p = '\0';
3010 	    break;
3011 	}
3012     }
3013 
3014     return s;
3015 }
3016 
3017 /**
3018  * gretl_utf8_replace_char:
3019  * @targ: the target for replacement.
3020  * @src: the UTF-8 character to insert (NUL terminated).
3021  * @pos: the position, in UTF-8 characters, at which to replace.
3022  *
3023  * Notes: @pos must be prechecked as within bounds, and
3024  * @src must be prechecked for containing a single character.
3025  *
3026  * Returns: newly allocated resulting string.
3027  */
3028 
3029 char *gretl_utf8_replace_char (char *targ, char *src, int pos)
3030 {
3031     char *s = g_utf8_offset_to_pointer(targ, pos);
3032     char *p = g_utf8_find_next_char(s, NULL);
3033     char *ret;
3034     int len;
3035 
3036     len = strlen(targ) + strlen(src) + 1 - (p - s);
3037     ret = calloc(len, 1);
3038     strncat(ret, targ, s - targ);
3039     strcat(ret, src);
3040     strcat(ret, p);
3041 
3042     return ret;
3043 }
3044 
3045 /**
3046  * gretl_scan_varname:
3047  * @src: source string.
3048  * @targ: target string.
3049  *
3050  * Performs sscanf() on @src, using a conversion specifier
3051  * which allows for writing up to VNAMELEN-1 bytes into
3052  * @targ (stopping at white space); @targ therefore be at
3053  * least VNAMELEN bytes long. No checking is done for the
3054  * validity of the scanned string as a gretl identifier.
3055  *
3056  * Returns: the return value from sscanf().
3057  */
3058 
3059 int gretl_scan_varname (const char *src, char *targ)
3060 {
3061     char fmt[8];
3062 
3063     sprintf(fmt, "%%%ds", VNAMELEN-1);
3064     return sscanf(src, fmt, targ);
3065 }
3066 
3067 /**
3068  * gretl_normalize_varname:
3069  * @targ: target string.
3070  * @src: source string.
3071  * @underscore: flag to replace all illegal characters
3072  * with underscore.
3073  * @seq: sequence number in array of names, if applicable.
3074  *
3075  * Writes a vaid gretl identifier to @targ, which must be
3076  * at least #VNAMELEN bytes in length, taking @src as basis
3077  * and replacing any illegal characters as described in the
3078  * documentation for the userland fixname function.
3079  *
3080  * Returns: 1 if any changes were required, 0 if not.
3081  */
3082 
3083 int gretl_normalize_varname (char *targ, const char *src,
3084 			     int underscore, int seq)
3085 {
3086     const char *letters = "abcdefghijklmnopqrstuvwxyz"
3087 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
3088     int n, ret = 0, i = 0;
3089 
3090     /* skip any leading non-letters */
3091     n = strcspn(src, letters);
3092     if (n > 0) {
3093 	src += n;
3094 	ret = 1;
3095     }
3096 
3097     while (*src && i < VNAMELEN - 1) {
3098 	if (strspn(src, letters) > 0 || isdigit(*src) || *src == '_') {
3099 	    /* transcribe valid characters */
3100 	    targ[i++] = *src;
3101 	} else {
3102 	    if (*src == ' ' || underscore) {
3103 		/* convert space to underscore */
3104 		if (i > 0 && targ[i-1] == '_') {
3105 		    ; /* skip */
3106 		} else {
3107 		    targ[i++] = '_';
3108 		}
3109 	    }
3110 	    ret = 1;
3111 	}
3112 	src++;
3113     }
3114 
3115     if (i > 0) {
3116 	targ[i] = '\0';
3117     } else {
3118 	if (seq <= 0) {
3119 	    strcpy(targ, "col[n]");
3120 	} else {
3121 	    sprintf(targ, "col%d", seq);
3122 	}
3123 	ret = 1;
3124     }
3125 
3126     return ret;
3127 }
3128 
3129 /**
3130  * gretl_regexp_replace:
3131  * @orig: the original string.
3132  * @match: the pattern to match.
3133  * @repl: the replacement expression for @match.
3134  * @err: location to receive error code.
3135  *
3136  * Builds a string based on @orig but in which all
3137  * occurrences of @match (which is interpreted as a
3138  * regular expression of the Perl type) are replaced
3139  * by means of @repl (also interpreted as a regular
3140  * expression).
3141  *
3142  * Returns: newly allocated string or NULL on failure.
3143  */
3144 
3145 char *gretl_regexp_replace (const char *orig,
3146 			    const char *match,
3147 			    const char *repl,
3148 			    int *err)
3149 {
3150     GRegex *regex;
3151     GError *error = NULL;
3152     char *mod = NULL;
3153 
3154     regex = g_regex_new(match, 0, 0, &error);
3155 
3156     if (error == NULL) {
3157 	mod = g_regex_replace(regex, orig, -1, 0, repl, 0, &error);
3158     }
3159 
3160     if (error != NULL) {
3161 	*err = 1;
3162 	gretl_errmsg_set(error->message);
3163 	g_error_free(error);
3164     }
3165 
3166     if (regex != NULL) {
3167 	g_regex_unref(regex);
3168     }
3169 
3170     return mod;
3171 }
3172 
3173 /**
3174  * gretl_literal_replace:
3175  * @orig: the original string.
3176  * @match: the substring to match.
3177  * @repl: the replacement string for @match.
3178  * @err: location to receive error code.
3179  *
3180  * Builds a string based on @orig but in which all
3181  * occurrences of @match (which is interpreted as a
3182  * straight string literal) are replaced by @repl (also
3183  * a straight string literal).
3184  *
3185  * Returns: newly allocated string or NULL on failure.
3186  */
3187 
3188 char *gretl_literal_replace (const char *orig,
3189 			     const char *match,
3190 			     const char *repl,
3191 			     int *err)
3192 {
3193     char *mod = NULL;
3194     const char *q, *r;
3195     int mlen = strlen(match);
3196     int nrep = 0;
3197 
3198     if (mlen > 0) {
3199 	/* count the occurrences of @match */
3200 	q = orig;
3201 	while ((r = strstr(q, match)) != NULL) {
3202 	    nrep++;
3203 	    q = r + mlen;
3204 	}
3205     }
3206 
3207     if (nrep == 0) {
3208 	/* no replacement needed */
3209 	mod = gretl_strdup(orig);
3210     } else {
3211 	int ldiff = nrep * (strlen(repl) - mlen);
3212 
3213 	mod = malloc(strlen(orig) + ldiff + 1);
3214 	if (mod != NULL) {
3215 	    q = orig;
3216 	    *mod = '\0';
3217 	    while ((r = strstr(q, match)) != NULL) {
3218 		strncat(mod, q, r - q);
3219 		strcat(mod, repl);
3220 		q = r + mlen;
3221 	    }
3222 	    if (*q) {
3223 		strcat(mod, q);
3224 	    }
3225 	}
3226     }
3227 
3228     if (mod == NULL) {
3229 	*err = E_ALLOC;
3230     }
3231 
3232     return mod;
3233 }
3234 
3235 /**
3236  * gretl_substring:
3237  * @str: the string to operate on.
3238  * @first: 1-based index of initial character.
3239  * @last: 1-based index of final character, or -1 to go to the end.
3240  * @err: location to receive error code.
3241  *
3242  * Returns: a substring of @str, from @first to @last.
3243  */
3244 
3245 char *gretl_substring (const char *str, int first, int last, int *err)
3246 {
3247     int len, ini, fin, sublen;
3248     char *ret;
3249 
3250     len = g_utf8_strlen(str, -1);
3251     if (last == -1) {
3252 	last = len;
3253     }
3254 
3255     if (first <= 0 || last <= 0) {
3256 	gretl_errmsg_sprintf("Index value %d is out of bounds",
3257 			     first <= 0 ? first : last);
3258 	*err = E_DATA;
3259     }
3260 
3261     ini = (first < 1) ? 1 : ((first > len) ? len : first);
3262     fin = (last < 1) ? 1 : ((last > len) ? len : last);
3263     sublen = (fin >= ini) ? fin - ini + 1 : 0;
3264 
3265     if (sublen == 0) {
3266 	ret = calloc(1, 1);
3267     } else {
3268 	const char *s1;
3269 	int i;
3270 
3271 	for (i=1; i<ini; i++) {
3272 	    str = g_utf8_next_char(str);
3273 	}
3274 	s1 = str;
3275 	for (i=ini; i<=last; i++) {
3276 	    str = g_utf8_next_char(str);
3277 	}
3278 	len = str - s1;
3279 	ret = calloc(len + 1, 1);
3280 	if (ret != NULL) {
3281 	    *ret = '\0';
3282 	    gretl_utf8_strncat(ret, s1, len);
3283 	}
3284     }
3285 
3286     if (ret == NULL) {
3287 	*err = E_ALLOC;
3288     }
3289 
3290     return ret;
3291 }
3292