1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /* strutils.c for gretl */
21
22 #include "libgretl.h"
23
24 #include <errno.h>
25 #include <time.h>
26 #include <glib.h>
27
28 /**
29 * SECTION:strutils
30 * @short_description: miscellaneous string-handling utilities
31 * @title: Strings
32 * @include: libgretl.h
33 *
34 * Various functions for creating, testing and manipulating
35 * strings and arrays of strings.
36 */
37
38 /**
39 * string_is_blank:
40 * @s: the string to examine.
41 *
42 * Returns: 1 if the string is NULL, of length zero, or contains
43 * nothing but space characters, otherwise returns 0.
44 **/
45
string_is_blank(const char * s)46 int string_is_blank (const char *s)
47 {
48 int ret = 1;
49
50 if (s != NULL) {
51 while (*s) {
52 if (!isspace((unsigned char) *s) &&
53 *s != '\r' && *s != CTRLZ) {
54 ret = 0;
55 break;
56 }
57 s++;
58 }
59 }
60
61 return ret;
62 }
63
64 static int atof_point;
65
set_atof_point(char c)66 void set_atof_point (char c)
67 {
68 atof_point = c;
69 }
70
71 /**
72 * dot_atof:
73 * @s: the string to convert.
74 *
75 * Returns: the double-precision numeric interpretation of @s,
76 * where the decimal point character is forced to be '.',
77 * regardless of the current locale.
78 **/
79
dot_atof(const char * s)80 double dot_atof (const char *s)
81 {
82 #ifndef ENABLE_NLS
83 return atof(s);
84 #else
85 double x;
86
87 if (atof_point == 0) {
88 struct lconv *lc = localeconv();
89
90 atof_point = *lc->decimal_point;
91 }
92
93 if (atof_point == '.') {
94 x = atof(s);
95 } else {
96 gretl_push_c_numeric_locale();
97 x = atof(s);
98 gretl_pop_c_numeric_locale();
99 }
100
101 return x;
102 #endif
103 }
104
105 /**
106 * gretl_dotpos:
107 * @str: the string to examine.
108 *
109 * Returns: the integer position of the last "." within @str,
110 * or strlen(@str) in case a dot is not found, or the string
111 * ends with a (backward or forward) slash.
112 */
113
gretl_dotpos(const char * str)114 int gretl_dotpos (const char *str)
115 {
116 int i, p = 0;
117
118 if (str != NULL && *str != '\0') {
119 p = strlen(str);
120 for (i=p-1; i>0; i--) {
121 if (str[i] == '/' || str[i] == '\\') {
122 break;
123 } else if (str[i] == '.') {
124 p = i;
125 break;
126 }
127 }
128 }
129
130 return p;
131 }
132
133 /**
134 * gretl_slashpos:
135 * @str: the string to examine.
136 *
137 * Returns: the integer position of the last #SLASH within @str,
138 * or 0 in case a #SLASH is not found.
139 */
140
gretl_slashpos(const char * str)141 int gretl_slashpos (const char *str)
142 {
143 int i, p = 0;
144
145 if (str != NULL && *str != '\0') {
146 p = strlen(str);
147 for (i=p-1; i>0; i--) {
148 #ifdef WIN32
149 if (str[i] == '\\' || str[i] == '/') {
150 #else
151 if (str[i] == SLASH) {
152 #endif
153 p = i;
154 break;
155 }
156 }
157 }
158
159 return p;
160 }
161
162 /**
163 * strrslash:
164 * @s: the string to examine.
165 *
166 * Returns: a pointer to the last occurrence of 'SLASH'
167 * in the string @d, making allowance for the fact that
168 * on MS Windows this may be either a backslash or a
169 * forward slash, or NULL is no 'SLASH' is found.
170 */
171
172 char *strrslash (const char *s)
173 {
174 #ifdef WIN32
175 char *p1 = strrchr(s, '\\');
176 char *p2 = strrchr(s, '/');
177
178 if (p1 != NULL && p2 == NULL) {
179 return p1;
180 } else if (p2 != NULL && p1 == NULL) {
181 return p2;
182 } else if (p1 != NULL && p2 != NULL) {
183 return p2 - p1 > 0 ? p2 : p1;
184 } else {
185 return NULL;
186 }
187 #else
188 return strrchr(s, '/');
189 #endif
190 }
191
192 /**
193 * gretl_delchar:
194 * @c: the character to delete.
195 * @str: the string from which to delete @c.
196 *
197 * Deletes all instances of @c within @str.
198 *
199 * Returns: the possibly modified string.
200 */
201
202 char *gretl_delchar (int c, char *str)
203 {
204 int i, j;
205
206 for (i=j=0; str[i] != '\0'; i++) {
207 if (str[i] != c) {
208 str[j++] = str[i];
209 }
210 }
211
212 str[j] = '\0';
213
214 return str;
215 }
216
217 /**
218 * gretl_delete:
219 * @str: the string to process.
220 * @idx: the starting point for deleting characters.
221 * @count: the number of characters to delete.
222 *
223 * Deletes @count characters from @str, starting at position @idx.
224 *
225 * Returns: the modified string.
226 */
227
228 char *gretl_delete (char *str, int idx, int count)
229 {
230 size_t i, n = strlen(str);
231
232 for (i=idx; i<=n-count; ++i) {
233 str[i] = str[count+i];
234 }
235
236 return str;
237 }
238
239 /**
240 * gretl_unquote:
241 * @str: the string to process.
242 * @err: location to receive error code.
243 *
244 * If @str begins with the ASCII double-quote character, checks
245 * that the last character is also a double-quote, and in that
246 * case trims the quotes from both ends. If the first character
247 * is a double quote but the last is not, flags an error. If
248 * the string is not quoted at all, returns the original
249 * string.
250 *
251 * Returns: the input string, possibly modified in place.
252 */
253
254 char *gretl_unquote (char *str, int *err)
255 {
256 *err = 0;
257
258 if (*str == '"') {
259 int n = strlen(str);
260
261 if (n > 1) {
262 if (str[n-1] == '"') {
263 str[n-1] = '\0';
264 } else {
265 *err = E_PARSE;
266 }
267 } else {
268 *err = E_PARSE;
269 }
270
271 if (!*err) {
272 shift_string_left(str, 1);
273 }
274 }
275
276 return str;
277 }
278
279 /**
280 * gretl_charpos:
281 * @c: the character to look for.
282 * @s: the string to examine.
283 *
284 * Returns: the first position of @c in @s, or -1 if @c is not
285 * found.
286 */
287
288 int gretl_charpos (char c, const char *s)
289 {
290 int i = 0;
291
292 while (*s) {
293 if (*s++ == c) {
294 return i;
295 }
296 i++;
297 }
298
299 return -1;
300 }
301
302 /**
303 * gretl_charsub:
304 * @str: the string to operate on.
305 * @find: the character to replace.
306 * @repl: the replacement character.
307 *
308 * Replaces all occurrences of @find with @repl in @str.
309 *
310 * Returns: the (possibly modified) string.
311 */
312
313 char *gretl_charsub (char *str, char find, char repl)
314 {
315 char *p = str;
316
317 while (*p) {
318 if (*p == find) {
319 *p = repl;
320 }
321 p++;
322 }
323
324 return str;
325 }
326
327 /**
328 * comma_separate_numbers:
329 * @s: the string to operate on.
330 *
331 * Given a string which contains two or more numbers
332 * separated by spaces and/or commas, revise the
333 * string to ensure that all the numbers are comma-separated.
334 *
335 * Returns: the (possibly modified) string.
336 */
337
338 char *comma_separate_numbers (char *s)
339 {
340 const char *numstart = "+-.0123456789";
341 char *p = s;
342 int i, n, done;
343
344 while (*s) {
345 n = strspn(s, " ,");
346 if (n > 0 && s[n] != '\0' && strchr(numstart, s[n])) {
347 done = 0;
348 for (i=0; i<n && !done; i++) {
349 if (s[i] == ',') {
350 done = 1;
351 }
352 }
353 if (!done) {
354 *s = ',';
355 }
356 }
357 s += (n > 0)? n : 1;
358 }
359
360 return p;
361 }
362
363 /**
364 * has_suffix:
365 * @str: the string to check.
366 * @sfx: the suffix to check for, including the leading '.'
367 *
368 * Returns: 1 if @str ends with @sfx (on a case-insensitive
369 * comparison), 0 otherwise.
370 */
371
372 int has_suffix (const char *str, const char *sfx)
373 {
374 const char *p;
375 int comp, ret = 0;
376
377 /* compound suffix, such as ".csv.gz" ? */
378 comp = (strchr(sfx + 1, '.') != NULL);
379
380 if (str != NULL && sfx != NULL) {
381 p = strrchr(str, *sfx);
382 if (comp && p - str > 4) {
383 p -= 4;
384 }
385 if (p != NULL && strlen(p) == strlen(sfx)) {
386 ret = 1;
387 while (*p) {
388 if (*p != *sfx && *p != toupper(*sfx)) {
389 ret = 0;
390 break;
391 }
392 p++;
393 sfx++;
394 }
395 }
396 }
397
398 return ret;
399 }
400
401 /**
402 * has_native_data_suffix:
403 * @fname: the filename to check.
404 *
405 * Returns: 1 if @fname ends with a suffix indicating it is a
406 * native gretl data file, 0 otherwise.
407 */
408
409 int has_native_data_suffix (const char *fname)
410 {
411 const char *p;
412
413 if (fname != NULL && (p = strrchr(fname, '.')) != NULL) {
414 p++;
415 if (!strcmp(p, "gdt") || !strcmp(p, "gdtb")) {
416 return 1;
417 }
418 if (!strcmp(p, "GDT") || !strcmp(p, "GDTB")) {
419 return 1;
420 }
421 }
422
423 return 0;
424 }
425
426 /**
427 * numeric_string:
428 * @str: the string to examine.
429 *
430 * Returns: 1 if the given @str is numeric, otherwise 0.
431 */
432
433 int numeric_string (const char *str)
434 {
435 char *test;
436 int ret = 1;
437
438 if (str == NULL || *str == '\0') {
439 return 0;
440 }
441
442 if (strlen(str) == 3) {
443 char screen[4];
444
445 strcpy(screen, str);
446 gretl_lower(screen);
447 if (!strcmp(screen, "inf") || !strcmp(screen, "nan")) {
448 /* could be variable names: they are not valid numbers */
449 return 0;
450 }
451 }
452
453 gretl_push_c_numeric_locale();
454 errno = 0;
455 strtod(str, &test);
456 gretl_pop_c_numeric_locale();
457
458 if (*test != '\0' || errno == ERANGE) {
459 ret = 0;
460 }
461
462 return ret;
463 }
464
465 /**
466 * integer_string:
467 * @str: the string to examine.
468 *
469 * Returns: 1 if the given @str represents an integer, otherwise 0.
470 */
471
472 int integer_string (const char *str)
473 {
474 char *test;
475 int ret = 1;
476
477 if (str == NULL || *str == '\0') {
478 return 0;
479 }
480
481 errno = 0;
482 strtol(str, &test, 10);
483 if (*test != '\0' || errno != 0) {
484 ret = 0;
485 }
486
487 return ret;
488 }
489
490 /**
491 * ends_with_backslash:
492 * @s: the string to examine.
493 *
494 * Returns: 1 if the last non-space character in @s is a backslash,
495 * otherwise 0.
496 */
497
498 int ends_with_backslash (const char *s)
499 {
500 int i, n = strlen(s);
501 int bs = 0;
502
503 for (i=n-1; i>=0; i--) {
504 if (!isspace((unsigned char) s[i])) {
505 if (s[i] == '\\') {
506 bs = 1;
507 }
508 break;
509 }
510 }
511
512 return bs;
513 }
514
515 /**
516 * gretl_lower:
517 * @str: the string to transform.
518 *
519 * Converts any upper case characters in @str to lower case.
520 *
521 * Returns: the possibly modified string.
522 */
523
524 char *gretl_lower (char *str)
525 {
526 char *p = str;
527
528 while (*p) {
529 if (isupper((unsigned char) *p)) {
530 *p = tolower(*p);
531 }
532 p++;
533 }
534
535 return str;
536 }
537
538 /**
539 * gretl_strdup:
540 * @src: the string to duplicate.
541 *
542 * Returns: an allocated copy of @src, or NULL on error.
543 */
544
545 char *gretl_strdup (const char *src)
546 {
547 char *targ = NULL;
548
549 if (src != NULL) {
550 size_t n = strlen(src) + 1;
551
552 targ = calloc(n, 1);
553 if (targ != NULL) {
554 memcpy(targ, src, n);
555 }
556 }
557
558 return targ;
559 }
560
561 /**
562 * gretl_strndup:
563 * @src: the string to be copied.
564 * @n: the maximum number of characters to copy.
565 *
566 * Returns: an allocated copy of at most @n characters from
567 * @src, or NULL on error.
568 */
569
570 char *gretl_strndup (const char *src, size_t n)
571 {
572 char *targ = NULL;
573
574 if (src != NULL) {
575 size_t len = strlen(src);
576
577 if (len > n) {
578 len = n;
579 }
580
581 targ = malloc(len + 1);
582 if (targ != NULL) {
583 memcpy(targ, src, len);
584 targ[len] = '\0';
585 }
586 }
587
588 return targ;
589 }
590
591 /**
592 * gretl_strdup_printf:
593 * @format: as in printf().
594 * @Varargs: arguments to be printed.
595 *
596 * Print the arguments according to @format.
597 *
598 * Returns: allocated result of the printing, or NULL on failure.
599 */
600
601 char *gretl_strdup_printf (const char *format, ...)
602 {
603 va_list args;
604 char *buf = NULL;
605 int len;
606
607 #ifdef HAVE_VASPRINTF
608 va_start(args, format);
609 len = vasprintf(&buf, format, args);
610 va_end(args);
611 if (len < 0) {
612 buf = NULL;
613 }
614 #else
615 int bsize = 2048;
616
617 buf = malloc(bsize);
618 if (buf == NULL) {
619 return NULL;
620 }
621
622 memset(buf, 0, 1);
623
624 va_start(args, format);
625 len = vsnprintf(buf, bsize, format, args);
626 va_end(args);
627
628 if (len >= bsize) {
629 fputs("gretl_strdup_printf warning: string was truncated\n",
630 stderr);
631 }
632 #endif
633
634 return buf;
635 }
636
637 /**
638 * gretl_str_expand:
639 * @orig: pointer to the base string.
640 * @add: the string to be added.
641 * @sep: string to be interpolated, or NULL.
642 *
643 * Creates a newly allocated string built by concatenating
644 * @orig and @add, with @sep interpolated unless @sep is
645 * NULL, and replaces the content of @orig with the new string.
646 * As a special case, if @orig is NULL, or if the content of
647 * @orig is NULL, we just duplicate @add.
648 *
649 * Returns: the reallocated string, or NULL on failure. In case
650 * of failure the content of @orig is freed, if @orig is not NULL,
651 * to avoid memory leakage.
652 */
653
654 char *gretl_str_expand (char **orig, const char *add, const char *sep)
655 {
656 char *targ;
657 int n;
658
659 if (add == NULL) {
660 return NULL;
661 }
662
663 if (orig == NULL || *orig == NULL) {
664 return gretl_strdup(add);
665 }
666
667 n = strlen(*orig);
668 if (sep != NULL) {
669 n += strlen(sep);
670 }
671 n += strlen(add) + 1;
672
673 targ = realloc(*orig, n);
674 if (targ == NULL) {
675 free(*orig);
676 *orig = NULL;
677 return NULL;
678 }
679
680 if (sep != NULL) {
681 strcat(targ, sep);
682 }
683 strcat(targ, add);
684 *orig = targ;
685
686 return targ;
687 }
688
689 static int is_word_char (const char *s, int i, gretlopt opt)
690 {
691 unsigned char c = *s;
692
693 if (isalnum(c) || c == '_') {
694 return 1;
695 } else if ((opt & OPT_D) && c == '.') {
696 return 1;
697 } else if (opt & OPT_U) {
698 /* allow Greeks */
699 if ((c == 0xCE || c == 0xCF) && *(s+1)) {
700 return 1;
701 } else if (i > 0) {
702 unsigned char prev = *(s-1);
703
704 return (prev == 0xCE && c >= 0x91 && c <= 0xBF) ||
705 (prev == 0xCF && c >= 0x80 && c <= 0x89);
706 }
707 }
708
709 return 0;
710 }
711
712 /**
713 * gretl_word_strdup:
714 * @src: the source string.
715 * @ptr: location to receive end of word pointer, or NULL.
716 * @opt: can include OPT_S for "strict" operation: in this
717 * case an error is flagged if @src contains any characters
718 * other than 'word' characters (see below), comma and space.
719 * Also may include OPT_D to allow dot as a "word" character,
720 * OPT_U to accept UTF-8 Greek letters.
721 * @err: location to receive error code.
722 *
723 * Copies the first 'word' found in @src, where a word
724 * is defined as consisting of alphanumeric characters
725 * and the underscore. If @ptr is not NULL, on exit it
726 * points at the next position in @src after the copied
727 * word.
728 *
729 * Returns: the allocated word or NULL in case no word is
730 * found, or on error.
731 */
732
733 char *gretl_word_strdup (const char *src, const char **ptr,
734 gretlopt opt, int *err)
735 {
736 char *targ = NULL;
737
738 if (src == NULL) {
739 if (ptr != NULL) {
740 *ptr = NULL;
741 }
742 } else if (*src == '\0') {
743 if (ptr != NULL) {
744 *ptr = src;
745 }
746 } else {
747 const char *p;
748 int len = 0;
749 int i = 0;
750
751 if (opt & OPT_S) {
752 /* strict: check for any junk */
753 while (*src && (*src == ' ' || *src == ',')) {
754 src++; i++;
755 }
756 if (*src && !is_word_char(src, 0, opt)) {
757 gretl_errmsg_sprintf(_("Unexpected symbol '%c'"), *src);
758 *err = E_PARSE;
759 return NULL;
760 }
761 } else {
762 /* just skip to first 'word char' */
763 while (*src && !is_word_char(src, i, opt)) {
764 src++; i++;
765 }
766 }
767
768 /* set to start of 'word' */
769 p = src;
770
771 while (is_word_char(src, i, opt)) {
772 len++;
773 src++;
774 i++;
775 }
776
777 if (opt & OPT_S) {
778 if (*src != '\0' && *src != ' ' && *src != ',') {
779 gretl_errmsg_sprintf(_("Unexpected symbol '%c'"), *src);
780 *err = E_PARSE;
781 return NULL;
782 }
783 }
784
785 if (ptr != NULL) {
786 *ptr = src;
787 }
788
789 if (len > 0) {
790 targ = gretl_strndup(p, len);
791 if (targ == NULL) {
792 *err = E_ALLOC;
793 }
794 }
795 }
796
797 return targ;
798 }
799
800 /**
801 * gretl_quoted_string_strdup:
802 * @s: the source string.
803 * @ptr: location to receive end pointer, or NULL.
804 *
805 * If @s starts with a quote (double or single), return a copy of
806 * the portion of @s that is enclosed in quotes. That is,
807 * from @s + 1 up to but not including the next matching quote.
808 * If @ptr is not NULL, on output it receives a pointer to
809 * the next byte in @s after the closing quote.
810 *
811 * Returns: the allocated string or NULL on failure.
812 */
813
814 char *gretl_quoted_string_strdup (const char *s, const char **ptr)
815 {
816 char q, *ret = NULL;
817 const char *p = NULL;
818
819 if (s != NULL && (*s == '"' || *s == '\'')) {
820 int gotit = 0;
821
822 q = *s;
823 s++;
824 p = s;
825 while (*p && !gotit) {
826 if (*p == q && *(p-1) != '\\') {
827 /* found non-escaped matching quote */
828 gotit = 1;
829 } else {
830 p++;
831 }
832 }
833 if (!gotit) {
834 p = NULL;
835 }
836 }
837
838 if (p == NULL) {
839 if (ptr != NULL) {
840 *ptr = NULL;
841 }
842 } else {
843 if (ptr != NULL) {
844 *ptr = p + 1;
845 }
846 ret = gretl_strndup(s, p - s);
847 }
848
849 return ret;
850 }
851
852 /* variant of gretl_string_split() that respects
853 empty fields, including them in the output array
854 */
855
856 static char **string_split_2 (const char *s, int *n,
857 const char *sep)
858 {
859 char **S = NULL;
860 gchar **tmp;
861 gchar *mysep;
862 int i, m = 0;
863
864 *n = 0;
865
866 if (sep[0] == '\t' && sep[1] == '\0') {
867 mysep = g_strdup(sep);
868 } else {
869 mysep = g_strstrip(g_strdup(sep));
870 }
871
872 tmp = g_strsplit(s, mysep, -1);
873 if (tmp != NULL) {
874 for (i=0; tmp[i]; i++) {
875 m++;
876 }
877 if (m > 0) {
878 S = strings_array_new(m);
879 if (S != NULL) {
880 for (i=0; i<m; i++) {
881 S[i] = gretl_strdup(g_strstrip(tmp[i]));
882 }
883 }
884 }
885 g_strfreev(tmp);
886 }
887
888 g_free(mysep);
889 *n = m;
890
891 return S;
892 }
893
894 /* Re. the separator given to gretl_string_split():
895 if it contains anything other than whitespace
896 characters we'll respect empty fields
897 */
898
899 static int respect_empty_fields (const char *s)
900 {
901 if (s[0] == '\t' && s[1] == '\0') {
902 /* treat single tab as "true" separator string */
903 return 1;
904 }
905 while (*s) {
906 if (!isspace(*s)) {
907 return 1;
908 }
909 s++;
910 }
911
912 return 0;
913 }
914
915 /**
916 * gretl_string_split:
917 * @s: the source string.
918 * @n: location to receive the number of substrings.
919 * @sep: string containing the character(s) to count as
920 * field separators, or NULL. If @sep is NULL only the
921 * space character counts.
922 *
923 * Parses @s into a set of zero or more substrings and
924 * creates an array of those substrings. On sucessful exit
925 * @n holds the number of substrings.
926 *
927 * Returns: the allocated array or NULL in case of failure.
928 */
929
930 char **gretl_string_split (const char *s, int *n,
931 const char *sep)
932 {
933 int i, k, m;
934 char *word;
935 char **S;
936
937 *n = 0;
938 if (s == NULL) {
939 return NULL;
940 }
941
942 if (sep == NULL) {
943 sep = " ";
944 } else if (respect_empty_fields(sep)) {
945 return string_split_2(s, n, sep);
946 }
947
948 m = count_fields(s, sep);
949 if (m == 0) {
950 return NULL;
951 }
952
953 S = strings_array_new(m);
954 if (S == NULL) {
955 return NULL;
956 }
957
958 for (i=0; i<m; i++) {
959 s += strspn(s, sep);
960 k = strcspn(s, sep);
961 word = gretl_strndup(s, k);
962 if (word == NULL) {
963 strings_array_free(S, m);
964 return NULL;
965 }
966 S[i] = word;
967 s += k;
968 }
969
970 *n = m;
971
972 return S;
973 }
974
975 /**
976 * gretl_string_split_lines:
977 * @s: the source string.
978 * @n: location to receive the number of substrings.
979 *
980 * Parses @s into a set of zero or more substrings, one per
981 * complete line of @s, and creates an array of those substrings.
982 * On sucessful exit @n holds the number of substrings.
983 *
984 * Returns: the allocated array or NULL in case of failure.
985 */
986
987 char **gretl_string_split_lines (const char *s, int *n)
988 {
989 const char *p = s;
990 int i, len, m = 0;
991 int err = 0;
992 char **S = NULL;
993
994 *n = 0;
995
996 while (*p) {
997 if (*p == '\n') {
998 m++;
999 }
1000 p++;
1001 }
1002
1003 if (m == 0) {
1004 return NULL;
1005 }
1006
1007 S = strings_array_new(m);
1008 if (S == NULL) {
1009 return NULL;
1010 }
1011
1012 p = s;
1013 i = 0;
1014
1015 while (*p && i < m) {
1016 len = strcspn(p, "\r\n");
1017 S[i] = gretl_strndup(p, len);
1018 if (S[i] == NULL) {
1019 err = E_ALLOC;
1020 break;
1021 }
1022 gretl_strstrip(S[i]);
1023 i++;
1024 p += len;
1025 if (*p == '\r') p++;
1026 if (*p == '\n') p++;
1027 if (*p == '\0') {
1028 break;
1029 }
1030 }
1031
1032 if (err) {
1033 strings_array_free(S, m);
1034 S = NULL;
1035 } else {
1036 *n = m;
1037 }
1038
1039 return S;
1040 }
1041
1042 /**
1043 * gretl_string_split_quoted:
1044 * @s: the source string.
1045 * @n: location to receive the number of substrings.
1046 * @sep: string containing the character(s) to count as
1047 * field separators, or NULL. If @sep is NULL only space,
1048 * tab and newline count.
1049 * @err: location to receive error code.
1050 *
1051 * Similar to gretl_string_split(), except that this variant
1052 * allows for the presence of double-quoted substrings
1053 * which may contain spaces. The quotes are removed in the
1054 * members of the returned array.
1055 *
1056 * Returns: allocated array of substrings or NULL in case of failure.
1057 */
1058
1059 char **gretl_string_split_quoted (const char *s, int *n,
1060 const char *sep, int *err)
1061 {
1062 const char *ignore;
1063 const char *q, *p = s;
1064 int i, len, m = 0;
1065 int grabit, quoted;
1066 char *substr;
1067 char **S;
1068
1069 *err = 0;
1070 ignore = sep != NULL ? sep : " \t\n";
1071
1072 *n = 0;
1073
1074 while (*p) {
1075 p += strspn(p, ignore);
1076 if (*p == '"') {
1077 /* quoted substring */
1078 m++;
1079 q = strchr(p + 1, '"');
1080 if (q == NULL) {
1081 *err = E_PARSE;
1082 return NULL;
1083 }
1084 p = q;
1085 } else {
1086 len = strcspn(p, ignore);
1087 if (len > 0) {
1088 /* unquoted substring */
1089 m++;
1090 p += len - 1;
1091 }
1092 }
1093 if (*p == '\0') {
1094 break;
1095 }
1096 p++;
1097 }
1098
1099 if (*err || m == 0) {
1100 return NULL;
1101 }
1102
1103 S = strings_array_new(m);
1104 if (S == NULL) {
1105 *err = E_ALLOC;
1106 return NULL;
1107 }
1108
1109 p = s;
1110 i = 0;
1111
1112 while (*p && i < m) {
1113 grabit = quoted = 0;
1114 p += strspn(p, ignore);
1115 if (*p == '"') {
1116 grabit = quoted = 1;
1117 p++;
1118 len = strcspn(p, "\"");
1119 } else {
1120 len = strcspn(p, ignore);
1121 grabit = (len > 0);
1122 }
1123 if (grabit) {
1124 substr = gretl_strndup(p, len);
1125 if (substr == NULL) {
1126 *err = E_ALLOC;
1127 strings_array_free(S, m);
1128 return NULL;
1129 }
1130 S[i++] = substr;
1131 p += len + quoted;
1132 }
1133 }
1134
1135 *n = m;
1136
1137 return S;
1138 }
1139
1140 /**
1141 * gretl_trunc:
1142 * @str: the string to truncate.
1143 * @n: the desired length of the truncated string.
1144 *
1145 * Truncates the given @str to the specified length.
1146 *
1147 * Returns: the possibly truncated string.
1148 */
1149
1150 char *gretl_trunc (char *str, size_t n)
1151 {
1152 if (n < strlen(str)) {
1153 str[n] = '\0';
1154 }
1155
1156 return str;
1157 }
1158
1159 static const char *name_ok =
1160 "abcdefghijklmnopqrstuvwxyz"
1161 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1162 "0123456789_";
1163
1164 /**
1165 * is_greek_letter:
1166 * @s: the string to test.
1167 *
1168 * Checks for the case where @s contains a greek letter,
1169 * followed by a nul byte or an ascii character that cannot
1170 * occur in a gretl identifier.
1171
1172 * Returns: 1 if the case is met, otherwise 0.
1173 */
1174
1175 int is_greek_letter (const char *s)
1176 {
1177 unsigned char u[2] = {0};
1178 int ret = 0;
1179
1180 u[0] = s[0];
1181 if (s[0] != '\0') {
1182 u[1] = s[1];
1183 }
1184
1185 if ((u[0] == 0xCE && u[1] >= 0x91 && u[1] <= 0xBF) ||
1186 (u[0] == 0xCF && u[1] >= 0x80 && u[1] <= 0x89)) {
1187 char nxt = s[2];
1188
1189 if (nxt == '\0') {
1190 ret = 1;
1191 } else if (nxt >= 32 && nxt <= 126 && !strchr(name_ok, nxt)) {
1192 ret = 1;
1193 }
1194 }
1195
1196 return ret;
1197 }
1198
1199 /**
1200 * gretl_namechar_spn:
1201 * @s: the string to examine.
1202 *
1203 * Returns: the length of the initial segment of @s which
1204 * consists of characters that are valid in a gretl
1205 * variable or object name, namely a-z, A-Z, 0-9 and _,
1206 * starting with a letter, up to a maximum of %VNAMELEN - 1.
1207 */
1208
1209 int gretl_namechar_spn (const char *s)
1210 {
1211 int ret = 0;
1212
1213 if (isalpha(*s)) {
1214 ret = strspn(s, name_ok);
1215 }
1216
1217 if (ret == 0 && is_greek_letter(s)) {
1218 ret = 2;
1219 }
1220
1221 return ret;
1222 }
1223
1224 /**
1225 * double_quote_position:
1226 * @s: the source string.
1227 *
1228 * Returns: the 0-based index of the position of the next
1229 * unescaped double-quote character in @s, or -1 if no
1230 * such character is found.
1231 */
1232
1233 int double_quote_position (const char *s)
1234 {
1235 int i, j, ns, n = -1;
1236
1237 for (i=0; s[i]; i++) {
1238 if (s[i] == '"') {
1239 ns = 0;
1240 for (j=i-1; j>=0; j--) {
1241 if (s[j] == '\\') {
1242 ns++;
1243 } else {
1244 break;
1245 }
1246 }
1247 if (ns % 2 == 0) {
1248 /* got an unescaped double-quote */
1249 n = i;
1250 break;
1251 }
1252 }
1253 }
1254
1255 return n;
1256 }
1257
1258 /**
1259 * count_fields:
1260 * @s: the string to process.
1261 * @sep: string containing the character(s) to count as
1262 * field separators, or NULL. If @sep is NULL only the
1263 * space character counts.
1264 *
1265 * Returns: the number of fields in @s.
1266 */
1267
1268 int count_fields (const char *s, const char *sep)
1269 {
1270 int nf = 0;
1271
1272 if (sep == NULL) {
1273 sep = " ";
1274 }
1275
1276 if (s != NULL && *s != '\0') {
1277 const char *p;
1278
1279 /* step past separator(s) */
1280 s += strspn(s, sep);
1281
1282 if (*s != '\0') {
1283 s++;
1284 nf++;
1285 }
1286
1287 while (*s) {
1288 p = strpbrk(s, sep);
1289 if (p != NULL) {
1290 s = p + strspn(p, sep);
1291 if (*s) {
1292 nf++;
1293 }
1294 } else {
1295 break;
1296 }
1297 }
1298 }
1299
1300 return nf;
1301 }
1302
1303 /**
1304 * count_lines:
1305 * @s: the string to process.
1306 *
1307 * Returns: the number of complete lines (lines ending
1308 * with the newline character) in @s.
1309 */
1310
1311 int count_lines (const char *s)
1312 {
1313 int nl = 0;
1314
1315 if (s != NULL) {
1316 while (*s) {
1317 if (*s == '\n') {
1318 nl++;
1319 }
1320 s++;
1321 }
1322 }
1323
1324 return nl;
1325 }
1326
1327 /**
1328 * shift_string_left:
1329 * @str: the string to process.
1330 * @move: the number of places to shift.
1331 *
1332 * Shifts the content of @str left by @move places, dropping
1333 * leading bytes as needed.
1334 *
1335 * Returns: the modified string.
1336 */
1337
1338 char *shift_string_left (char *str, size_t move)
1339 {
1340 size_t n = strlen(str);
1341
1342 if (move >= n) {
1343 *str = '\0';
1344 } else {
1345 memmove(str, str + move, n - move);
1346 str[n - move] = '\0';
1347 }
1348
1349 return str;
1350 }
1351
1352 /**
1353 * gretl_strstrip:
1354 * @str: the string to process.
1355 *
1356 * Removes leading and trailing white space from a string.
1357 *
1358 * Returns: the possibly modified string.
1359 */
1360
1361 char *gretl_strstrip (char *str)
1362 {
1363 int i, n = strspn(str, " \t");
1364
1365 if (n > 0) {
1366 shift_string_left(str, n);
1367 }
1368
1369 n = strlen(str);
1370
1371 for (i=n-1; i>=0; i--) {
1372 if (isspace(str[i]) || str[i] == '\r') {
1373 str[i] = '\0';
1374 } else {
1375 break;
1376 }
1377 }
1378
1379 return str;
1380 }
1381
1382 /**
1383 * gretl_strstrip_copy:
1384 * @str: the string to process.
1385 *
1386 * Returns: a copy of @str, from which both leading and
1387 * trailing white space have been removed.
1388 */
1389
1390 char *gretl_strstrip_copy (const char *str, int *err)
1391 {
1392 char *ret = NULL;
1393 int i, n;
1394
1395 if (str == NULL) {
1396 *err = E_INVARG;
1397 return NULL;
1398 }
1399
1400 while (isspace(*str)) {
1401 str++;
1402 }
1403
1404 n = strlen(str);
1405
1406 for (i=n-1; i>=0; i--) {
1407 if (isspace(str[i]) || str[i] == '\r') {
1408 n--;
1409 } else {
1410 break;
1411 }
1412 }
1413
1414 ret = gretl_strndup(str, n);
1415 if (ret == NULL) {
1416 *err = E_ALLOC;
1417 }
1418
1419 return ret;
1420 }
1421
1422 /**
1423 * switch_ext:
1424 * @targ: the target or output string (must be pre-allocated).
1425 * @src: the source or input string.
1426 * @ext: the extension or suffix to attach, without leading dot.
1427 *
1428 * For processing filenames: copies @src to @targ, minus any existing
1429 * filename extension, and adds to @targ the specified extension.
1430 *
1431 * Returns: the output string, @targ.
1432 */
1433
1434 char *switch_ext (char *targ, const char *src, const char *ext)
1435 {
1436 int i = gretl_dotpos(src);
1437
1438 if (targ != src) {
1439 *targ = '\0';
1440 strncat(targ, src, i);
1441 }
1442
1443 targ[i] = '.';
1444 targ[i + 1] = '\0';
1445 strcat(targ, ext);
1446
1447 return targ;
1448 }
1449
1450 /**
1451 * switch_ext_in_place:
1452 * @fname: must have sufficient space to add the given extension.
1453 * @ext: the extension or suffix to attach, without dot.
1454 *
1455 * For processing filenames: removes any existing dot-extension on
1456 * @fname and appends a dot followed by @ext.
1457 *
1458 * Returns: the modified string, @fname.
1459 */
1460
1461 char *switch_ext_in_place (char *fname, const char *ext)
1462 {
1463 int i = gretl_dotpos(fname);
1464
1465 fname[i] = '.';
1466 fname[i + 1] = '\0';
1467 strcat(fname, ext);
1468
1469 return fname;
1470 }
1471
1472 /**
1473 * switch_ext_new:
1474 * @src: the original string.
1475 * @ext: the extension or suffix to attach (without leading '.').
1476 *
1477 * For processing filenames: creates a copy of @src in which
1478 * any existing dot-extension is removed and @ext is appended
1479 * (with a dot automatically inserted).
1480 *
1481 * Returns: the newly allocated string.
1482 */
1483
1484 char *switch_ext_new (const char *src, const char *ext)
1485 {
1486 int len = strlen(src) + strlen(ext) + 2;
1487 const char *p = strrchr(src, '.');
1488 char *ret = NULL;
1489
1490 if (p != NULL) {
1491 len -= strlen(p);
1492 }
1493
1494 ret = calloc(len, 1);
1495
1496 if (ret != NULL) {
1497 if (p != NULL) {
1498 strncat(ret, src, p - src);
1499 } else {
1500 strcat(ret, src);
1501 }
1502 strcat(ret, ".");
1503 strcat(ret, ext);
1504 }
1505
1506 return ret;
1507 }
1508
1509 static int ends_in_comment (const char *s, int n)
1510 {
1511 int i, quoted = 0;
1512
1513 /* the '#' character is inert (only) if it appears
1514 within a string literal */
1515
1516 for (i=n; i>1; i--) {
1517 if (s[i] == '"') {
1518 quoted = !quoted;
1519 } else if (!quoted && s[i] == '#') {
1520 return 1;
1521 }
1522 }
1523
1524 return 0;
1525 }
1526
1527 #define LINE_CONT(c) (c == '\\' || c == ',' || c == '(')
1528
1529 /**
1530 * top_n_tail:
1531 * @str: the string to process.
1532 * @maxlen: maximum length of string, including NUL termination.
1533 * @err: location to receive error code, or NULL.
1534 *
1535 * Drop leading space and trailing space and newline from string,
1536 * then replace a trailing backslash (if any) with a space.
1537 * If @str does not end with a newline within the limit set by
1538 * @maxlen, and @err is not NULL, then E_TOOLONG is written
1539 * to @err.
1540 *
1541 * Returns: 1 if a trailing backslash, comma, semicolon, or left
1542 * parenthesis was found, otherwise 0.
1543 */
1544
1545 int top_n_tail (char *str, size_t maxlen, int *err)
1546 {
1547 int i, n, cont = 0;
1548
1549 if (str == NULL || *str == '\0' || *str == '\n' || *str == '\r') {
1550 return 0;
1551 }
1552
1553 n = strlen(str) - 1;
1554
1555 if (err != NULL && n > maxlen - 2 && str[n] != '\n') {
1556 *err = E_TOOLONG;
1557 }
1558
1559 /* chop any trailing space */
1560 for (i=n; i>=0; i--) {
1561 if (isspace((unsigned char) str[i])) {
1562 str[i] = '\0';
1563 n--;
1564 } else {
1565 break;
1566 }
1567 }
1568
1569 if (*str != '\0') {
1570 /* Drop any leading spaces, also possible questionmark. Try
1571 to catch non-breaking spaces too -- ugh, Windows!
1572 (NBSP is 0xA0 in Windows CP1252)
1573 */
1574 i = 0;
1575 while (isspace((unsigned char) str[i]) ||
1576 str[i] == '?' ||
1577 str[i] == (char) 0xC2 ||
1578 str[i] == (char) 0xA0) {
1579 n--;
1580 i++;
1581 }
1582 if (i > 0) {
1583 shift_string_left(str, i);
1584 }
1585
1586 if (*str == '#' || !strncmp(str, "/*", 2)) {
1587 ; /* the line starts a comment: leave well alone */
1588 } else if (n >= 0 && LINE_CONT(str[n])) {
1589 /* register line continuation characters at the end of
1590 the line, but only if not preceded by the comment
1591 character '#' (unquoted)
1592 */
1593 cont = !ends_in_comment(str, n - 1);
1594 if (cont && str[n] == '\\') {
1595 /* replace backslash */
1596 str[n] = ' ';
1597 }
1598 }
1599 }
1600
1601 return cont;
1602 }
1603
1604 /**
1605 * equation_get_lhs_and_rhs:
1606 * @s: equation in string form.
1607 * @plh: pointer to receive left-hand side expression.
1608 * @prh: pointer to receive right-hand side expression.
1609 *
1610 * Given a string @s, parse it into a left-hand side and a right-hand
1611 * side, separated by an equals sign. Return in @plh and @prh
1612 * allocated copies of the respective sides, with any leading or trailing
1613 * white space trimmed.
1614 *
1615 * Returns: 0 on success, 1 on error.
1616 */
1617
1618 int equation_get_lhs_and_rhs (const char *s, char **plh, char **prh)
1619 {
1620 const char *p;
1621 char *lh = NULL, *rh = NULL;
1622 int i, len, err = 0;
1623
1624 if (s == NULL || plh == NULL || prh == NULL) {
1625 err = 1;
1626 }
1627
1628 if (!err) {
1629 *plh = NULL;
1630 *prh = NULL;
1631
1632 p = strchr(s, '=');
1633 if (p == NULL) {
1634 err = 1;
1635 }
1636 }
1637
1638 if (!err) {
1639 p = s;
1640 while (isspace(*p)) p++;
1641 len = strcspn(p, " =");
1642 if (len == 0) {
1643 err = 1;
1644 } else {
1645 lh = gretl_strndup(p, len);
1646 if (lh == NULL) {
1647 err = 1;
1648 }
1649 }
1650 }
1651
1652 if (!err) {
1653 p = strchr(s, '=') + 1;
1654 while (isspace(*p)) p++;
1655 len = strlen(p);
1656 if (len == 0) {
1657 err = 1;
1658 } else {
1659 for (i=len-1; i>=0; i--) {
1660 if (isspace(p[i])) len--;
1661 else break;
1662 }
1663 rh = gretl_strndup(p, len);
1664 if (rh == NULL) {
1665 err = 1;
1666 }
1667 }
1668 }
1669
1670 if (err) {
1671 free(lh);
1672 free(rh);
1673 } else {
1674 *plh = lh;
1675 *prh = rh;
1676 }
1677
1678 return err;
1679 }
1680
1681 /**
1682 * tailstrip:
1683 * @str: the string to process.
1684 *
1685 * Drop trailing space (and newline if any) from string.
1686 *
1687 * Returns: the modified string.
1688 */
1689
1690 char *tailstrip (char *str)
1691 {
1692 int i, len;
1693
1694 if (str == NULL || *str == '\0') {
1695 return str;
1696 }
1697
1698 len = strlen(str);
1699
1700 for (i=len-1; i>=0; i--) {
1701 if (isspace((unsigned char) str[i]) ||
1702 str[i] == '\n' || str[i] == '\r') {
1703 str[i] = '\0';
1704 } else {
1705 break;
1706 }
1707 }
1708
1709 return str;
1710 }
1711
1712 /**
1713 * compress_spaces:
1714 * @s: the string to process.
1715 *
1716 * Reduce multiple contiguous space characters to single spaces
1717 * within @s.
1718 *
1719 * Returns: the compressed string.
1720 */
1721
1722 char *compress_spaces (char *s)
1723 {
1724 int i = 0, inquote = 0;
1725 char *p, *q;
1726
1727 if (s == NULL || *s == '\0') {
1728 return s;
1729 }
1730
1731 p = q = s;
1732
1733 while (*s) {
1734 if (*s == '"' && (i == 0 || *(s-1) != '\\')) {
1735 inquote = !inquote;
1736 }
1737 if (!inquote) {
1738 if (*s == '\t') {
1739 *s = ' '; /* trash tabs */
1740 }
1741 if (*s == ' ') {
1742 p = s + 1;
1743 if (*p == '\0') break;
1744 while (*p == ' ') p++;
1745 if (p - s > 1) {
1746 memmove(s + 1, p, strlen(p) + 1);
1747 }
1748 }
1749 }
1750 s++;
1751 i++;
1752 }
1753
1754 return q;
1755 }
1756
1757 /**
1758 * space_to_score:
1759 * @s: the string to process.
1760 *
1761 * Replace any spaces with underscores in @s.
1762 *
1763 * Returns: the (possibly) modified string.
1764 */
1765
1766 char *space_to_score (char *s)
1767 {
1768 char *p = s;
1769
1770 while (*p) {
1771 if (*p == ' ') *p = '_';
1772 p++;
1773 }
1774
1775 return s;
1776 }
1777
1778 /**
1779 * strings_array_new:
1780 * @nstrs: number of strings in array.
1781 *
1782 * Allocates storage for @nstrs strings and initializes all
1783 * to NULL.
1784 *
1785 * Returns: the allocated array, or NULL on failure.
1786 */
1787
1788 char **strings_array_new (int nstrs)
1789 {
1790 char **s;
1791 int i;
1792
1793 if (nstrs <= 0) {
1794 return NULL;
1795 }
1796
1797 s = malloc(nstrs * sizeof *s);
1798 if (s != NULL) {
1799 for (i=0; i<nstrs; i++) {
1800 s[i] = NULL;
1801 }
1802 }
1803
1804 return s;
1805 }
1806
1807 /**
1808 * strings_array_add:
1809 * @pS: pointer to strings array.
1810 * @n: location of present number of strings in array.
1811 * @p: string to add to array.
1812 *
1813 * Allocates storage for an extra member of @S and adds a
1814 * copy of string @p in the last position. On success,
1815 * the content of @n is incremented by 1.
1816 *
1817 * Returns: 0 on success, %E_ALLOC on failure.
1818 */
1819
1820 int strings_array_add (char ***pS, int *n, const char *p)
1821 {
1822 char **Tmp;
1823 int m = *n;
1824
1825 Tmp = realloc(*pS, (m + 1) * sizeof *Tmp);
1826 if (Tmp == NULL) {
1827 return E_ALLOC;
1828 }
1829
1830 *pS = Tmp;
1831
1832 if (p != NULL) {
1833 Tmp[m] = gretl_strdup(p);
1834 if (Tmp[m] == NULL) {
1835 return E_ALLOC;
1836 }
1837 } else {
1838 Tmp[m] = NULL;
1839 }
1840
1841 *n += 1;
1842
1843 return 0;
1844 }
1845
1846 /**
1847 * strings_array_prepend_uniq:
1848 * @pS: pointer to strings array.
1849 * @n: location of present number of strings in array.
1850 * @p: string to prepend to array.
1851 *
1852 * If @p is already present in the array at location @pS,
1853 * moves it into first position if it is not already there.
1854 * Otherwise allocates storage for an extra member of the
1855 * array and pushes @p into first position, in which case
1856 * the content of @n is incremented by 1.
1857 *
1858 * Returns: 0 on success, %E_ALLOC on failure.
1859 */
1860
1861 int strings_array_prepend_uniq (char ***pS, int *n, const char *p)
1862 {
1863 char **S = *pS;
1864 char *s0;
1865 int m = *n;
1866 int i, pos = -1;
1867
1868 if (p == NULL) {
1869 return E_DATA;
1870 }
1871
1872 for (i=0; i<m; i++) {
1873 if (!strcmp(S[i], p)) {
1874 pos = i;
1875 break;
1876 }
1877 }
1878
1879 if (pos == 0) {
1880 /* already present in 1st position */
1881 return 0;
1882 } else if (pos > 0) {
1883 /* already present, not first */
1884 s0 = S[pos];
1885 for (i=pos; i>0; i--) {
1886 S[i] = S[i-1];
1887 }
1888 S[0] = s0;
1889 } else if (pos < 0) {
1890 /* not present */
1891 S = realloc(*pS, (m + 1) * sizeof *S);
1892 if (S == NULL) {
1893 return E_ALLOC;
1894 }
1895 s0 = gretl_strdup(p);
1896 if (s0 == NULL) {
1897 return E_ALLOC;
1898 }
1899 *pS = S;
1900 *n = ++m;
1901 /* shuffle up */
1902 for (i=m; i>0; i--) {
1903 S[i] = S[i-1];
1904 }
1905 /* and insert @p */
1906 S[0] = s0;
1907 }
1908
1909 return 0;
1910 }
1911
1912 /**
1913 * strings_array_donate:
1914 * @pS: pointer to strings array.
1915 * @n: location of present number of strings in array.
1916 * @p: string to append to array.
1917 *
1918 * Allocates storage for an extra member of @S and adds
1919 * string @p in the last position. Unlike strings_array_add(),
1920 * the array takes ownnership of @p rather than copying it.
1921 * On success, the content of @n is incremented by 1.
1922 *
1923 * Returns: 0 on success, %E_ALLOC on failure.
1924 */
1925
1926 int strings_array_donate (char ***pS, int *n, char *p)
1927 {
1928 char **Tmp;
1929 int m = *n;
1930
1931 Tmp = realloc(*pS, (m + 1) * sizeof *Tmp);
1932 if (Tmp == NULL) {
1933 return E_ALLOC;
1934 }
1935
1936 *pS = Tmp;
1937
1938 if (p != NULL) {
1939 Tmp[m] = p;
1940 if (Tmp[m] == NULL) {
1941 return E_ALLOC;
1942 }
1943 } else {
1944 Tmp[m] = NULL;
1945 }
1946
1947 *n += 1;
1948
1949 return 0;
1950 }
1951
1952 /**
1953 * strings_array_add_uniq:
1954 * @pS: pointer to strings array.
1955 * @n: location of present number of strings in array.
1956 * @p: string to test for addition to array.
1957 * @pos: location to receive the position of @p in the
1958 * array (whether already matched or newly added), or NULL.
1959 *
1960 * If the array does not already include a copy of @p,
1961 * allocates storage for an extra member of @pS and adds a
1962 * copy of string @p in the last position. On successful
1963 * addition the content of @n is incremented by 1.
1964 *
1965 * Returns: 0 on success, %E_ALLOC on failure.
1966 */
1967
1968 int strings_array_add_uniq (char ***pS, int *n, const char *p,
1969 int *pos)
1970 {
1971 char **Tmp, **S = *pS;
1972 int m = *n;
1973 int i;
1974
1975 for (i=0; i<m; i++) {
1976 if (S[i] != NULL && strcmp(S[i], p) == 0) {
1977 if (pos != NULL) {
1978 *pos = i;
1979 }
1980 return 0; /* no-op */
1981 }
1982 }
1983
1984 Tmp = realloc(S, (m + 1) * sizeof *Tmp);
1985 if (Tmp == NULL) {
1986 return E_ALLOC;
1987 }
1988
1989 if (p != NULL) {
1990 Tmp[m] = gretl_strdup(p);
1991 if (Tmp[m] == NULL) {
1992 return E_ALLOC;
1993 }
1994 } else {
1995 Tmp[m] = NULL;
1996 }
1997
1998 if (pos != NULL) {
1999 *pos = m;
2000 }
2001
2002 *pS = Tmp;
2003 *n += 1;
2004
2005 return 0;
2006 }
2007
2008 /**
2009 * strings_array_new_with_length:
2010 * @nstrs: number of strings in array.
2011 * @len: number of bytes per string.
2012 *
2013 * Allocates storage for @nstrs strings, each of them
2014 * @len bytes long. The first byte of each string is
2015 * initialized to 0.
2016 *
2017 * Returns: the allocated array, or NULL on failure.
2018 */
2019
2020 char **strings_array_new_with_length (int nstrs, int len)
2021 {
2022 char **S;
2023 int i, j;
2024
2025 if (nstrs <= 0) {
2026 return NULL;
2027 }
2028
2029 S = malloc(nstrs * sizeof *S);
2030 if (S == NULL) return NULL;
2031
2032 for (i=0; i<nstrs; i++) {
2033 S[i] = malloc(len);
2034 if (S[i] == NULL) {
2035 for (j=0; j<i; j++) {
2036 free(S[j]);
2037 }
2038 free(S);
2039 return NULL;
2040 }
2041 S[i][0] = '\0';
2042 }
2043
2044 return S;
2045 }
2046
2047 /**
2048 * strings_array_realloc_with_length:
2049 * @pS: existing array to reallocate.
2050 * @oldn: original number of strings in the array.
2051 * @newn: new number of strings in array.
2052 * @len: number of bytes per string.
2053 *
2054 * Adjusts the storage in @pS to a size of @newn
2055 * strings, each of them @len bytes long. The first
2056 * byte of any additional strings is initialized to 0.
2057 * This function may be used either to expand or to
2058 * shrink an existing array of strings.
2059 *
2060 * Returns: the new array, or NULL on failure.
2061 */
2062
2063 char **strings_array_realloc_with_length (char ***pS,
2064 int oldn,
2065 int newn,
2066 int len)
2067 {
2068 char **S;
2069 int i, j;
2070
2071 if (pS == NULL) {
2072 /* huh? */
2073 return NULL;
2074 }
2075
2076 if (newn == oldn) {
2077 /* no-op */
2078 return *pS;
2079 }
2080
2081 if (newn <= 0) {
2082 strings_array_free(*pS, oldn);
2083 *pS = NULL;
2084 return NULL;
2085 }
2086
2087 /* in case we're shrinking the array */
2088 for (i=newn; i<oldn; i++) {
2089 free((*pS)[i]);
2090 (*pS)[i] = NULL;
2091 }
2092
2093 S = realloc(*pS, newn * sizeof *S);
2094 if (S == NULL) {
2095 strings_array_free(*pS, oldn);
2096 *pS = NULL;
2097 return NULL;
2098 }
2099
2100 *pS = S;
2101
2102 /* in case we're expanding the array */
2103 for (i=oldn; i<newn; i++) {
2104 S[i] = malloc(len);
2105 if (S[i] == NULL) {
2106 for (j=0; j<i; j++) {
2107 free(S[j]);
2108 }
2109 free(*pS);
2110 *pS = NULL;
2111 return NULL;
2112 }
2113 S[i][0] = '\0';
2114 }
2115
2116 return *pS;
2117 }
2118
2119 /**
2120 * strings_array_dup:
2121 * @strs: array of strings to be copied.
2122 * @n: number of strings in array.
2123 *
2124 * Returns: an allocated copy of @strs, or NULL on failure.
2125 */
2126
2127 char **strings_array_dup (char **strs, int n)
2128 {
2129 char **S;
2130 int i, j;
2131
2132 if (n <= 0 || strs == NULL) {
2133 return NULL;
2134 }
2135
2136 S = malloc(n * sizeof *S);
2137 if (S == NULL) return NULL;
2138
2139 for (i=0; i<n; i++) {
2140 if (strs[i] == NULL) {
2141 S[i] = NULL;
2142 } else {
2143 S[i] = gretl_strdup(strs[i]);
2144 if (S[i] == NULL) {
2145 for (j=0; j<i; j++) {
2146 free(S[j]);
2147 }
2148 free(S);
2149 return NULL;
2150 }
2151 }
2152 }
2153
2154 return S;
2155 }
2156
2157 static int compare_strings (const void *a, const void *b)
2158 {
2159 const char **sa = (const char **) a;
2160 const char **sb = (const char **) b;
2161
2162 return strcmp(*sa, *sb);
2163 }
2164
2165 /**
2166 * strings_array_sort:
2167 * @pS: location of array of strings.
2168 * @n: location of the number of strings in the array.
2169 * @opt: may contain %OPT_U to trim the sorted array
2170 * so that it contains only unique entries.
2171 *
2172 * Sorts an array of strings in ascending lexicographical
2173 * order. If %OPT_U is given, @n holds the number of unique
2174 * strings on exit. It is assumed that storage for the
2175 * strings array was obtained via strings_array_new() or
2176 * a similar libgretl function.
2177 *
2178 * Returns: 0 on success, non-zero on error.
2179 */
2180
2181 int strings_array_sort (char ***pS, int *n, gretlopt opt)
2182 {
2183 char **S;
2184 int ns;
2185
2186 if (pS == NULL || n == NULL) {
2187 return E_DATA;
2188 }
2189
2190 S = *pS;
2191 ns = *n;
2192
2193 qsort(S, ns, sizeof *S, compare_strings);
2194
2195 if (opt & OPT_U) {
2196 int i, j, m = ns;
2197
2198 for (i=0; i<m-1; i++) {
2199 if (!strcmp(S[i], S[i+1])) {
2200 free(S[i+1]);
2201 for (j=i+1; j<m-1; j++) {
2202 S[j] = S[j+1];
2203 }
2204 S[m-1] = NULL;
2205 i--;
2206 m--;
2207 }
2208 }
2209 if (m < ns) {
2210 char **tmp = realloc(S, m * sizeof *S);
2211
2212 if (tmp != NULL) {
2213 *pS = tmp;
2214 }
2215 *n = m;
2216 }
2217 }
2218
2219 return 0;
2220 }
2221
2222 /**
2223 * strings_array_cmp:
2224 * @strs1: first array of strings.
2225 * @strs2: second array of strings.
2226 * @n: number of strings to examine.
2227 *
2228 * Compares for equality two arrays of strings, each of
2229 * which must contain at least @n elements. Equality
2230 * of the arrays means that strcmp returns 0 for
2231 * each pair of strings @strs1[i], @strs2[i], for i
2232 * equals 0 to @n - 1.
2233 *
2234 * Returns: 0 if the arrays compare equal, non-zero
2235 * otherwise.
2236 */
2237
2238 int strings_array_cmp (char **strs1, char **strs2, int n)
2239 {
2240 int i, ret = 0;
2241
2242 for (i=0; i<n && !ret; i++) {
2243 ret = strcmp(strs1[i], strs2[i]);
2244 }
2245
2246 return ret;
2247 }
2248
2249 /**
2250 * strings_array_position:
2251 * @strs: array of strings.
2252 * @n: number of elements in @strs.
2253 * @s: string to test.
2254 *
2255 * Returns: the 0-based position of the first member of @strs
2256 * to compare equal to @s, or -1 if no match is found.
2257 */
2258
2259 int strings_array_position (char **strs, int n, const char *s)
2260 {
2261 int i, ret = -1;
2262
2263 if (s != NULL) {
2264 for (i=0; i<n && ret<0; i++) {
2265 if (strs[i] != NULL && !strcmp(strs[i], s)) {
2266 ret = i;
2267 }
2268 }
2269 }
2270
2271 return ret;
2272 }
2273
2274 /**
2275 * strings_array_diff:
2276 * @strs1: first array of strings.
2277 * @n1: number of strings in @strs1.
2278 * @strs2: second array of strings.
2279 * @n2: number of strings in @strs2.
2280 * @extra: location to receive array of strings present
2281 * in @strs2 but not in @strs1 (or NULL).
2282 * @n_extra: location to receive bumber of strings in
2283 * the @extra array (or NULL).
2284 *
2285 * Fills out @extra with any strings present in @strs2
2286 * but not in @strs1.
2287 *
2288 * Returns: 0 on success, non-zero otherwise.
2289 */
2290
2291 int strings_array_diff (char **strs1, int n1,
2292 char **strs2, int n2,
2293 char ***extra, int *n_extra)
2294 {
2295 char **S = NULL;
2296 int i, j, n, found;
2297 int err = 0;
2298
2299 if (extra != NULL && n_extra != NULL) {
2300 n = 0;
2301 for (j=0; j<n2 && !err; j++) {
2302 found = 0;
2303 for (i=0; i<n1 && !found; i++) {
2304 if (!strcmp(strs2[j], strs1[i])) {
2305 found = 1;
2306 }
2307 }
2308 if (!found) {
2309 err = strings_array_add(&S, &n, strs2[j]);
2310 }
2311 }
2312 if (!err) {
2313 *extra = S;
2314 *n_extra = n;
2315 }
2316 }
2317
2318 return err;
2319 }
2320
2321 /**
2322 * strings_array_reverse:
2323 * @strs: array of allocated strings.
2324 * @nstrs: number of strings in array.
2325 *
2326 * Returns: an array of strings with @nstrs in which the
2327 * strings are those in @strs, in reverse order -- or
2328 * NULL on failure.
2329 */
2330
2331 char **strings_array_reverse (char **strs, int nstrs)
2332 {
2333 char **S = NULL;
2334 int i;
2335
2336 S = strings_array_new(nstrs);
2337
2338 if (S != NULL) {
2339 for (i=0; i<nstrs; i++) {
2340 S[i] = gretl_strdup(strs[nstrs-i-1]);
2341 if (S[i] == NULL) {
2342 strings_array_free(S, nstrs);
2343 break;
2344 }
2345 }
2346 }
2347
2348 return S;
2349 }
2350
2351 /**
2352 * strings_array_free:
2353 * @strs: array of allocated strings.
2354 * @nstrs: number of strings in array.
2355 *
2356 * Frees each allocated string in @strs, then frees @strs itself.
2357 * Checks that @strs is not NULL before proceeding.
2358 */
2359
2360 void strings_array_free (char **strs, int nstrs)
2361 {
2362 int i;
2363
2364 if (strs != NULL) {
2365 for (i=0; i<nstrs; i++) {
2366 free(strs[i]);
2367 }
2368 free(strs);
2369 }
2370 }
2371
2372 /**
2373 * get_obs_string:
2374 * @obs: char array big enough to hold the observation (#OBSLEN).
2375 * @t: zero-based observation number.
2376 * @dset: pointer to dataset information.
2377 *
2378 * Returns: the observation string corresponding to @t.
2379 */
2380
2381 char *get_obs_string (char *obs, int t, const DATASET *dset)
2382 {
2383 if (dataset_has_markers(dset)) {
2384 strcpy(obs, dset->S[t]);
2385 } else {
2386 ntolabel(obs, t, dset);
2387 }
2388
2389 return obs;
2390 }
2391
2392 /**
2393 * obs_str_to_double:
2394 * @obs: string representation of observation number.
2395 *
2396 * Returns: the floating-point counterpart of @obs,
2397 * or #NADBL on invalid input.
2398 */
2399
2400 double obs_str_to_double (const char *obs)
2401 {
2402 char *p, *test, tmp[OBSLEN];
2403 double ret;
2404
2405 strcpy(tmp, obs);
2406 p = tmp;
2407
2408 while (*p) {
2409 if (*p == ':' || *p == ',') {
2410 *p = '.';
2411 }
2412 p++;
2413 }
2414
2415 errno = 0;
2416
2417 gretl_push_c_numeric_locale();
2418 ret = strtod(tmp, &test);
2419 gretl_pop_c_numeric_locale();
2420
2421 if (*test != '\0' || errno == ERANGE) {
2422 ret = NADBL;
2423 }
2424
2425 return ret;
2426 }
2427
2428 /**
2429 * colonize_obs:
2430 * @obs: string representation of observation number.
2431 *
2432 * Converts a decimal point in @obs to a colon.
2433 *
2434 * Returns: the (possibly) modified obs string.
2435 */
2436
2437 char *colonize_obs (char *obs)
2438 {
2439 char *p = obs;
2440
2441 while (*p) {
2442 if (*p == '.' || *p == ',') {
2443 *p = ':';
2444 }
2445 p++;
2446 }
2447
2448 return obs;
2449 }
2450
2451 /**
2452 * modify_obs_for_csv:
2453 * @s: observation string (date).
2454 * @pd: data frequency.
2455 *
2456 * Modifies the observation string corresponding to obervation @t to
2457 * producing a form suitable for a CSV file. This applies only to
2458 * time series data. The string @s should be obtained by calling
2459 * ntolabel();
2460 */
2461
2462 void modify_date_for_csv (char *s, int pd)
2463 {
2464 if (pd == 4) {
2465 gretl_charsub(s, ':', 'Q');
2466 } else {
2467 gretl_charsub(s, ':', 'M');
2468 }
2469 }
2470
2471 /**
2472 * print_time:
2473 * @s: string into which to print: must be at least 48 bytes.
2474 *
2475 * Returns: @s, which will contain a string representation of the
2476 * current date and time, in the format YYYY-mm-dd H:M.
2477 */
2478
2479 char *print_time (char *s)
2480 {
2481 time_t now = time(NULL);
2482 struct tm *local;
2483
2484 local = localtime(&now);
2485 strftime(s, 47, "%Y-%m-%d %H:%M", local);
2486
2487 return s;
2488 }
2489
2490 /**
2491 * gretl_xml_validate:
2492 * @s: string to be tested.
2493 *
2494 * Returns: 1 if @s is acceptable for insertion into an XML file
2495 * as is, 0 if it contains special characters that need to be
2496 * escaped. See also gretl_xml_encode().
2497 */
2498
2499 int gretl_xml_validate (const char *s)
2500 {
2501 while (*s) {
2502 if (*s == '&' || *s == '<' || *s == '>' || *s == '"') {
2503 return 0;
2504 }
2505 s++;
2506 }
2507
2508 return 1;
2509 }
2510
2511 /**
2512 * gretl_xml_encode:
2513 * @str: NUL-terminated source string.
2514 *
2515 * Returns: an allocated re-write of @str, with characters that are
2516 * special in XML encoded as character entities. See also
2517 * gretl_xml_validate().
2518 */
2519
2520 char *gretl_xml_encode (const char *str)
2521 {
2522 char *targ, *p;
2523 const char *s = str;
2524 int len = strlen(s) + 1;
2525
2526 while (*s) {
2527 if (*s == '&') len += 4;
2528 else if (*s == '<') len += 3;
2529 else if (*s == '>') len += 3;
2530 else if (*s == '"') len += 5;
2531 s++;
2532 }
2533
2534 targ = malloc(len);
2535 if (targ == NULL) {
2536 gretl_errmsg_set(_("out of memory in XML encoding"));
2537 return NULL;
2538 }
2539
2540 s = str;
2541 p = targ;
2542
2543 while (*s) {
2544 if (*s == '&') {
2545 strcpy(p, "&");
2546 p += 5;
2547 } else if (*s == '<') {
2548 strcpy(p, "<");
2549 p += 4;
2550 } else if (*s == '>') {
2551 strcpy(p, ">");
2552 p += 4;
2553 } else if (*s == '"') {
2554 strcpy(p, """);
2555 p += 6;
2556 } else {
2557 *p++ = *s;
2558 }
2559 s++;
2560 }
2561
2562 targ[len-1] = '\0';
2563
2564 #ifdef XML_DEBUG
2565 fprintf(stderr, "done gretl_xml_encode: targ='%s'\n", targ);
2566 #endif
2567
2568 return targ;
2569 }
2570
2571 /**
2572 * gretl_xml_encode_to_buf:
2573 * @targ: target buffer.
2574 * @src: NUL-terminated source string.
2575 * @n: size of @targ in bytes.
2576 *
2577 * Writes into @targ a version of @src in which characters that are
2578 * special in XML are encoded as character entities. See also
2579 * gretl_xml_encode() for the case where the encoding of @src is
2580 * of unknown size at compile time.
2581 *
2582 * Returns: 0 on success or 1 on error. An error occurs if (a) the
2583 * encoded version of @src is longer than @n bytes (allowing for NUL
2584 * termination), or (b) @src does not validate as UTF-8. On error
2585 * the conversion is not done.
2586 */
2587
2588 int gretl_xml_encode_to_buf (char *targ, const char *src, int n)
2589 {
2590 const char *s = src;
2591 int len = strlen(s) + 1;
2592
2593 if (!g_utf8_validate(src, -1, NULL)) {
2594 fprintf(stderr, "gretl_xml_encode_to_buf: source not UTF-8\n");
2595 return 1;
2596 }
2597
2598 while (*s) {
2599 if (*s == '&') len += 4;
2600 else if (*s == '<') len += 3;
2601 else if (*s == '>') len += 3;
2602 else if (*s == '"') len += 5;
2603 s++;
2604 }
2605
2606 *targ = '\0';
2607
2608 if (len > n) {
2609 fprintf(stderr, "gretl_xml_encode_to_buf: buffer too small\n");
2610 return 1;
2611 }
2612
2613 s = src;
2614
2615 while (*s) {
2616 if (*s == '&') {
2617 strcpy(targ, "&");
2618 targ += 5;
2619 } else if (*s == '<') {
2620 strcpy(targ, "<");
2621 targ += 4;
2622 } else if (*s == '>') {
2623 strcpy(targ, ">");
2624 targ += 4;
2625 } else if (*s == '"') {
2626 strcpy(targ, """);
2627 targ += 6;
2628 } else {
2629 *targ++ = *s;
2630 }
2631 s++;
2632 }
2633
2634 *targ = '\0';
2635
2636 return 0;
2637 }
2638
2639 static char x2c (char *s)
2640 {
2641 register char digit;
2642
2643 digit = (s[0] >= 'A' ? ((s[0] & 0xdf) - 'A') + 10 : (s[0] - '0'));
2644 digit *= 16;
2645 digit += (s[1] >= 'A' ? ((s[1] & 0xdf) - 'A') + 10 : (s[1] - '0'));
2646 return digit;
2647 }
2648
2649 /**
2650 * unescape_url:
2651 * @url: string representing a URL.
2652 *
2653 */
2654
2655 void unescape_url (char *url)
2656 {
2657 register int x, y;
2658
2659 for (x=0, y=0; url[y]; ++x, ++y) {
2660 if ((url[x] = url[y]) == '%') {
2661 url[x] = x2c(&url[y+1]);
2662 y += 2;
2663 }
2664 }
2665 url[x] = '\0';
2666 }
2667
2668 /**
2669 * make_varname_unique:
2670 * @vname: tentative name for variable.
2671 * @v: the ID number for the new variable.
2672 * @dset: dataset information.
2673 *
2674 * Given a tentative name for a new variable, check that it
2675 * is not a duplicate of an existing varname. If it is,
2676 * modify the new name so that it becomes unique. The ID
2677 * number @v is required so that, if the variable has already
2678 * been added to the dataset, its name does not appear to
2679 * conflict with itself! If the name to be tested is not
2680 * associated with an existing variable, pass 0 for @v.
2681 *
2682 * Returns: the (possibly modified) variable name.
2683 */
2684
2685 char *make_varname_unique (char *vname, int v, DATASET *dset)
2686 {
2687 const char *sfx = "abcdefghijklmnopqrstuvwxzy"
2688 "ABCDEFGHIJKLMNOPQRSTUVWXZY";
2689 size_t n, nmax = VNAMELEN - 8;
2690 char tmp[5] = {0};
2691 int i, j, k, vi;
2692 int unique = 1;
2693
2694 /* first off: see if the series name is already unique! */
2695 vi = current_series_index(dset, vname);
2696 if ((v == 0 && vi < 0) || vi == v) {
2697 return vname;
2698 }
2699
2700 /* strategy: cut @vname down to a length that permits
2701 addition of a suffix (if necessary), then add a
2702 suffix composed of underscore and three (ASCII)
2703 letters. This allows for 52^3 = 140608 unique
2704 suffixes.
2705 */
2706
2707 n = strlen(vname);
2708 if (n > nmax) {
2709 n = nmax;
2710 }
2711
2712 tmp[0] = '_';
2713 unique = 0;
2714
2715 for (i=0; i<52 && !unique; i++) {
2716 tmp[1] = sfx[i];
2717 for (j=0; j<52 && !unique; j++) {
2718 tmp[2] = sfx[j];
2719 for (k=0; k<52 && !unique; k++) {
2720 tmp[3] = sfx[k];
2721 vname[n] = '\0';
2722 strcat(vname, tmp);
2723 unique = 1;
2724 for (vi = 1; vi < dset->v; vi++) {
2725 if (vi != v && !strcmp(vname, dset->varname[vi])) {
2726 unique = 0;
2727 break;
2728 }
2729 }
2730 }
2731 }
2732 }
2733
2734 if (!unique) {
2735 fprintf(stderr, "make_varname_unique: unresolved conflict!\n");
2736 }
2737
2738 return vname;
2739 }
2740
2741 int fix_varname_duplicates (DATASET *dset)
2742 {
2743 int msg_done, dups = 0;
2744 int i, j;
2745
2746 for (i=1; i<dset->v; i++) {
2747 msg_done = 0;
2748 for (j=i+1; j<dset->v; j++) {
2749 if (!strcmp(dset->varname[i], dset->varname[j])) {
2750 if (!msg_done) {
2751 fprintf(stderr, "'%s' duplicated variable name\n",
2752 dset->varname[i]);
2753 msg_done = 1;
2754 }
2755 dups = 1;
2756 make_varname_unique(dset->varname[j], j, dset);
2757 }
2758 }
2759 }
2760
2761 return dups;
2762 }
2763
2764 char *append_dir (char *fname, const char *dir)
2765 {
2766 size_t len;
2767
2768 if (dir == NULL) {
2769 return fname;
2770 }
2771
2772 len = strlen(fname);
2773
2774 if (fname[len - 1] == '/' || fname[len - 1] == '\\') {
2775 strcat(fname, dir);
2776 } else {
2777 strcat(fname, SLASHSTR);
2778 strcat(fname, dir);
2779 }
2780
2781 strcat(fname, SLASHSTR);
2782
2783 return fname;
2784 }
2785
2786 /**
2787 * path_last_element:
2788 * @path: path to work on.
2789 *
2790 * Returns: a pointer to the last element of @path, that is,
2791 * the element following the last path separator character, if any.
2792 * If @path does not contain a separator, @path itself is returned.
2793 * Note that the return value may be the empty string, if @path
2794 * ends with a separator.
2795 */
2796
2797 const char *path_last_element (const char *path)
2798 {
2799 const char *p = strrslash(path);
2800
2801 if (p == NULL) {
2802 p = path;
2803 } else {
2804 p++;
2805 }
2806
2807 return p;
2808 }
2809
2810 /**
2811 * trim_slash:
2812 * @s: string to work on.
2813 *
2814 * If @s ends with #SLASH, remove this character.
2815 *
2816 * Returns: the (possibly) modified string.
2817 */
2818
2819 char *trim_slash (char *s)
2820 {
2821 int n = strlen(s);
2822
2823 if (n > 0 && (s[n-1] == SLASH)) {
2824 s[n-1] = '\0';
2825 }
2826
2827 return s;
2828 }
2829
2830 /**
2831 * gretl_string_ends_with:
2832 * @s: string to examine.
2833 * @test: string to test for.
2834 *
2835 * Returns: 1 if @s ends with @test, else 0.
2836 */
2837
2838 int gretl_string_ends_with (const char *s, const char *test)
2839 {
2840 int nt = strlen(test);
2841 int n = strlen(s);
2842 int ret = 0;
2843
2844 if (n >= nt) {
2845 const char *p = s + n - nt;
2846
2847 ret = !strcmp(p, test);
2848 }
2849
2850 return ret;
2851 }
2852
2853 /**
2854 * get_column_widths:
2855 * @strs: array of @n strings.
2856 * @widths: array of @n default column widths.
2857 * @n: number of columns.
2858 *
2859 * If need be, increases the column widths in @widths to
2860 * accomodate the current translations of @strs.
2861 */
2862
2863 void get_column_widths (const char **strs, int *widths, int n)
2864 {
2865 int i, len;
2866
2867 for (i=0; i<n; i++) {
2868 len = g_utf8_strlen(_(strs[i]), -1);
2869 if (len > widths[i]) {
2870 widths[i] = len;
2871 }
2872 }
2873 }
2874
2875 /**
2876 * gretl_utf8_strncat:
2877 * @dest: destination string.
2878 * @src: source string.
2879 * @n: maximum number of bytes to append.
2880 *
2881 * Works just like strncat(), except that it ensures that we
2882 * don't end up with an incomplete UTF-8 character preceding
2883 * the terminating NUL byte.
2884 *
2885 * Returns: the destination string.
2886 */
2887
2888 char *gretl_utf8_strncat (char *dest, const char *src, size_t n)
2889 {
2890 const char *p = src;
2891 size_t b, b0 = 0;
2892
2893 while (p && *p) {
2894 p = g_utf8_next_char(p);
2895 if (p) {
2896 b = p - src;
2897 if (b > n) {
2898 break;
2899 }
2900 b0 = b;
2901 }
2902 }
2903
2904 return strncat(dest, src, b0);
2905 }
2906
2907 /**
2908 * gretl_utf8_strncat_trim:
2909 * @dest: destination string.
2910 * @src: source string.
2911 * @n: maximum number of bytes to append.
2912 *
2913 * The same as gretl_utf8_strncat(), except that any leading and/or
2914 * trailing white space is trimmed from @dest.
2915 *
2916 * Returns: the destination string.
2917 */
2918
2919 char *gretl_utf8_strncat_trim (char *dest, const char *src, size_t n)
2920 {
2921 const char *p;
2922 size_t b, b0 = 0;
2923 int i;
2924
2925 src += strspn(src, " \t\r\n");
2926 p = src;
2927
2928 while (p && *p) {
2929 p = g_utf8_next_char(p);
2930 if (p) {
2931 b = p - src;
2932 if (b > n) {
2933 break;
2934 }
2935 b0 = b;
2936 }
2937 }
2938
2939 strncat(dest, src, b0);
2940
2941 n = strlen(dest);
2942
2943 for (i=n-1; i>=0; i--) {
2944 if (isspace(dest[i]) || dest[i] == '\r') {
2945 dest[i] = '\0';
2946 } else {
2947 break;
2948 }
2949 }
2950
2951 return dest;
2952 }
2953
2954 /**
2955 * gretl_utf8_truncate:
2956 * @s: string to process.
2957 * @nmax: maximum number of characters to retain.
2958 *
2959 * Truncates @s to a maximum length of @nmax UTF-8 characters,
2960 * ensuring that we don't end up with an incomplete UTF-8
2961 * character preceding the terminating NUL byte.
2962 *
2963 * Returns: the (possibly truncated) string.
2964 */
2965
2966 char *gretl_utf8_truncate (char *s, size_t nmax)
2967 {
2968 char *p = s;
2969 size_t n = 0;
2970
2971 while (p && *p) {
2972 p = g_utf8_next_char(p);
2973 if (p && *p) {
2974 if (++n == nmax) {
2975 *p = '\0';
2976 break;
2977 }
2978 }
2979 }
2980
2981 return s;
2982 }
2983
2984 /**
2985 * gretl_utf8_truncate_b:
2986 * @s: string to process.
2987 * @bmax: maximum number of bytes to retain.
2988 *
2989 * Truncates @s to a maximum length of @bmax bytes,
2990 * ensuring that we don't end up with an incomplete UTF-8
2991 * character preceding the terminating NUL byte.
2992 *
2993 * Returns: the (possibly truncated) string.
2994 */
2995
2996 char *gretl_utf8_truncate_b (char *s, size_t bmax)
2997 {
2998 char *p = s;
2999 size_t b = 0;
3000
3001 while (p && *p) {
3002 p = g_utf8_next_char(p);
3003 b = p - s;
3004 if (b == bmax) {
3005 *p = '\0';
3006 break;
3007 } else if (b > bmax) {
3008 p = g_utf8_prev_char(p);
3009 *p = '\0';
3010 break;
3011 }
3012 }
3013
3014 return s;
3015 }
3016
3017 /**
3018 * gretl_utf8_replace_char:
3019 * @targ: the target for replacement.
3020 * @src: the UTF-8 character to insert (NUL terminated).
3021 * @pos: the position, in UTF-8 characters, at which to replace.
3022 *
3023 * Notes: @pos must be prechecked as within bounds, and
3024 * @src must be prechecked for containing a single character.
3025 *
3026 * Returns: newly allocated resulting string.
3027 */
3028
3029 char *gretl_utf8_replace_char (char *targ, char *src, int pos)
3030 {
3031 char *s = g_utf8_offset_to_pointer(targ, pos);
3032 char *p = g_utf8_find_next_char(s, NULL);
3033 char *ret;
3034 int len;
3035
3036 len = strlen(targ) + strlen(src) + 1 - (p - s);
3037 ret = calloc(len, 1);
3038 strncat(ret, targ, s - targ);
3039 strcat(ret, src);
3040 strcat(ret, p);
3041
3042 return ret;
3043 }
3044
3045 /**
3046 * gretl_scan_varname:
3047 * @src: source string.
3048 * @targ: target string.
3049 *
3050 * Performs sscanf() on @src, using a conversion specifier
3051 * which allows for writing up to VNAMELEN-1 bytes into
3052 * @targ (stopping at white space); @targ therefore be at
3053 * least VNAMELEN bytes long. No checking is done for the
3054 * validity of the scanned string as a gretl identifier.
3055 *
3056 * Returns: the return value from sscanf().
3057 */
3058
3059 int gretl_scan_varname (const char *src, char *targ)
3060 {
3061 char fmt[8];
3062
3063 sprintf(fmt, "%%%ds", VNAMELEN-1);
3064 return sscanf(src, fmt, targ);
3065 }
3066
3067 /**
3068 * gretl_normalize_varname:
3069 * @targ: target string.
3070 * @src: source string.
3071 * @underscore: flag to replace all illegal characters
3072 * with underscore.
3073 * @seq: sequence number in array of names, if applicable.
3074 *
3075 * Writes a vaid gretl identifier to @targ, which must be
3076 * at least #VNAMELEN bytes in length, taking @src as basis
3077 * and replacing any illegal characters as described in the
3078 * documentation for the userland fixname function.
3079 *
3080 * Returns: 1 if any changes were required, 0 if not.
3081 */
3082
3083 int gretl_normalize_varname (char *targ, const char *src,
3084 int underscore, int seq)
3085 {
3086 const char *letters = "abcdefghijklmnopqrstuvwxyz"
3087 "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
3088 int n, ret = 0, i = 0;
3089
3090 /* skip any leading non-letters */
3091 n = strcspn(src, letters);
3092 if (n > 0) {
3093 src += n;
3094 ret = 1;
3095 }
3096
3097 while (*src && i < VNAMELEN - 1) {
3098 if (strspn(src, letters) > 0 || isdigit(*src) || *src == '_') {
3099 /* transcribe valid characters */
3100 targ[i++] = *src;
3101 } else {
3102 if (*src == ' ' || underscore) {
3103 /* convert space to underscore */
3104 if (i > 0 && targ[i-1] == '_') {
3105 ; /* skip */
3106 } else {
3107 targ[i++] = '_';
3108 }
3109 }
3110 ret = 1;
3111 }
3112 src++;
3113 }
3114
3115 if (i > 0) {
3116 targ[i] = '\0';
3117 } else {
3118 if (seq <= 0) {
3119 strcpy(targ, "col[n]");
3120 } else {
3121 sprintf(targ, "col%d", seq);
3122 }
3123 ret = 1;
3124 }
3125
3126 return ret;
3127 }
3128
3129 /**
3130 * gretl_regexp_replace:
3131 * @orig: the original string.
3132 * @match: the pattern to match.
3133 * @repl: the replacement expression for @match.
3134 * @err: location to receive error code.
3135 *
3136 * Builds a string based on @orig but in which all
3137 * occurrences of @match (which is interpreted as a
3138 * regular expression of the Perl type) are replaced
3139 * by means of @repl (also interpreted as a regular
3140 * expression).
3141 *
3142 * Returns: newly allocated string or NULL on failure.
3143 */
3144
3145 char *gretl_regexp_replace (const char *orig,
3146 const char *match,
3147 const char *repl,
3148 int *err)
3149 {
3150 GRegex *regex;
3151 GError *error = NULL;
3152 char *mod = NULL;
3153
3154 regex = g_regex_new(match, 0, 0, &error);
3155
3156 if (error == NULL) {
3157 mod = g_regex_replace(regex, orig, -1, 0, repl, 0, &error);
3158 }
3159
3160 if (error != NULL) {
3161 *err = 1;
3162 gretl_errmsg_set(error->message);
3163 g_error_free(error);
3164 }
3165
3166 if (regex != NULL) {
3167 g_regex_unref(regex);
3168 }
3169
3170 return mod;
3171 }
3172
3173 /**
3174 * gretl_literal_replace:
3175 * @orig: the original string.
3176 * @match: the substring to match.
3177 * @repl: the replacement string for @match.
3178 * @err: location to receive error code.
3179 *
3180 * Builds a string based on @orig but in which all
3181 * occurrences of @match (which is interpreted as a
3182 * straight string literal) are replaced by @repl (also
3183 * a straight string literal).
3184 *
3185 * Returns: newly allocated string or NULL on failure.
3186 */
3187
3188 char *gretl_literal_replace (const char *orig,
3189 const char *match,
3190 const char *repl,
3191 int *err)
3192 {
3193 char *mod = NULL;
3194 const char *q, *r;
3195 int mlen = strlen(match);
3196 int nrep = 0;
3197
3198 if (mlen > 0) {
3199 /* count the occurrences of @match */
3200 q = orig;
3201 while ((r = strstr(q, match)) != NULL) {
3202 nrep++;
3203 q = r + mlen;
3204 }
3205 }
3206
3207 if (nrep == 0) {
3208 /* no replacement needed */
3209 mod = gretl_strdup(orig);
3210 } else {
3211 int ldiff = nrep * (strlen(repl) - mlen);
3212
3213 mod = malloc(strlen(orig) + ldiff + 1);
3214 if (mod != NULL) {
3215 q = orig;
3216 *mod = '\0';
3217 while ((r = strstr(q, match)) != NULL) {
3218 strncat(mod, q, r - q);
3219 strcat(mod, repl);
3220 q = r + mlen;
3221 }
3222 if (*q) {
3223 strcat(mod, q);
3224 }
3225 }
3226 }
3227
3228 if (mod == NULL) {
3229 *err = E_ALLOC;
3230 }
3231
3232 return mod;
3233 }
3234
3235 /**
3236 * gretl_substring:
3237 * @str: the string to operate on.
3238 * @first: 1-based index of initial character.
3239 * @last: 1-based index of final character, or -1 to go to the end.
3240 * @err: location to receive error code.
3241 *
3242 * Returns: a substring of @str, from @first to @last.
3243 */
3244
3245 char *gretl_substring (const char *str, int first, int last, int *err)
3246 {
3247 int len, ini, fin, sublen;
3248 char *ret;
3249
3250 len = g_utf8_strlen(str, -1);
3251 if (last == -1) {
3252 last = len;
3253 }
3254
3255 if (first <= 0 || last <= 0) {
3256 gretl_errmsg_sprintf("Index value %d is out of bounds",
3257 first <= 0 ? first : last);
3258 *err = E_DATA;
3259 }
3260
3261 ini = (first < 1) ? 1 : ((first > len) ? len : first);
3262 fin = (last < 1) ? 1 : ((last > len) ? len : last);
3263 sublen = (fin >= ini) ? fin - ini + 1 : 0;
3264
3265 if (sublen == 0) {
3266 ret = calloc(1, 1);
3267 } else {
3268 const char *s1;
3269 int i;
3270
3271 for (i=1; i<ini; i++) {
3272 str = g_utf8_next_char(str);
3273 }
3274 s1 = str;
3275 for (i=ini; i<=last; i++) {
3276 str = g_utf8_next_char(str);
3277 }
3278 len = str - s1;
3279 ret = calloc(len + 1, 1);
3280 if (ret != NULL) {
3281 *ret = '\0';
3282 gretl_utf8_strncat(ret, s1, len);
3283 }
3284 }
3285
3286 if (ret == NULL) {
3287 *err = E_ALLOC;
3288 }
3289
3290 return ret;
3291 }
3292