1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include "libgretl.h"
21 #include "libset.h"
22 #include "gretl_func.h"
23 #include "uservar.h"
24 #include "gretl_array.h"
25 #include "gretl_string_table.h"
26 #ifdef USE_CURL
27 # include "gretl_www.h"
28 #endif
29 
30 #include <errno.h>
31 #include <glib.h>
32 
33 enum {
34     ST_QUOTED  = 1 << 0,
35     ST_ALLINTS = 1 << 1,
36     ST_ALLDBLS = 1 << 2
37 };
38 
39 struct series_table_ {
40     int n_strs;       /* number of strings in table */
41     char **strs;      /* saved strings */
42     GHashTable *ht;   /* hash table for quick lookup */
43     int flags;        /* status flags (above) */
44 };
45 
46 struct gretl_string_table_ {
47     int *cols_list;       /* list of included columns */
48     series_table **cols;  /* per-column tables (see above) */
49     char *extra;          /* extra information, if any */
50 };
51 
52 #define st_quoted(t) (t->flags & ST_QUOTED)
53 #define all_ints(t)  (t->flags & ST_ALLINTS)
54 #define all_dbls(t)  (t->flags & ST_ALLDBLS)
55 #define all_num(t)   (t->flags & (ST_ALLINTS | ST_ALLDBLS))
56 
series_table_alloc(void)57 static series_table *series_table_alloc (void)
58 {
59     series_table *st = malloc(sizeof *st);
60 
61     if (st != NULL) {
62 	st->strs = NULL;
63 	st->n_strs = 0;
64 	st->ht = g_hash_table_new(g_str_hash, g_str_equal);
65 	st->flags = 0;
66     }
67 
68     return st;
69 }
70 
gretl_string_table_alloc(void)71 static gretl_string_table *gretl_string_table_alloc (void)
72 {
73     gretl_string_table *gst = malloc(sizeof *gst);
74 
75     if (gst != NULL) {
76 	gst->cols_list = NULL;
77 	gst->cols = NULL;
78 	gst->extra = NULL;
79     }
80 
81     return gst;
82 }
83 
84 /**
85  * gretl_string_table_new:
86  * @list: list of series IDs whose values are to be
87  * given a string representation, or NULL.
88  *
89  * These values in @list should correspond to the 0-based indices
90  * of the series in question within the dataset.  For example,
91  * if strings are to be recorded for variables 2, 5 and 10 the
92  * @list argument would be {3, 2, 5, 10}. If NULL is passed for
93  * @list the return value is an initialized, empty string table.
94  *
95  * Returns: pointer to a newly allocated string table or NULL
96  * on failure.
97  */
98 
gretl_string_table_new(const int * list)99 gretl_string_table *gretl_string_table_new (const int *list)
100 {
101     gretl_string_table *gst;
102     int ncols = 0;
103     int err = 0;
104 
105     gst = gretl_string_table_alloc();
106     if (gst == NULL) {
107 	return NULL;
108     }
109 
110     if (list != NULL && list[0] > 0) {
111 	gst->cols_list = gretl_list_copy(list);
112 	if (gst->cols_list == NULL) {
113 	    err = E_ALLOC;
114 	} else {
115 	    ncols = list[0];
116 	}
117     }
118 
119     if (ncols > 0) {
120 	gst->cols = malloc(ncols * sizeof *gst->cols);
121 	if (gst->cols == NULL) {
122 	    err = E_ALLOC;
123 	} else {
124 	    int i, j;
125 
126 	    for (i=0; i<ncols && !err; i++) {
127 		gst->cols[i] = series_table_alloc();
128 		if (gst->cols[i] == NULL) {
129 		    err = E_ALLOC;
130 		    for (j=0; j<i; j++) {
131 			free(gst->cols[j]);
132 		    }
133 		    free(gst->cols);
134 		}
135 	    }
136 	}
137     }
138 
139     if (err) {
140 	free(gst->cols_list);
141 	free(gst);
142 	gst = NULL;
143     }
144 
145     return gst;
146 }
147 
series_table_get_index(const series_table * st,const char * s)148 static int series_table_get_index (const series_table *st,
149 				   const char *s)
150 {
151     gpointer p = g_hash_table_lookup(st->ht, s);
152     int ret = 0;
153 
154     if (p != NULL) {
155 	ret = GPOINTER_TO_INT(p);
156     }
157 
158     return ret;
159 }
160 
161 /**
162  * series_table_get_value:
163  * @st: a gretl series table.
164  * @s: the string to look up.
165  *
166  * Returns: the numerical value associated with @s in the
167  * given series table, or #NADBL in case there is no match.
168  */
169 
series_table_get_value(series_table * st,const char * s)170 double series_table_get_value (series_table *st, const char *s)
171 {
172     int k = series_table_get_index(st, s);
173 
174     return (k > 0)? (double) k : NADBL;
175 }
176 
177 /**
178  * series_table_get_string:
179  * @st: a gretl series table.
180  * @val: the numerical value to look up.
181  *
182  * Returns: the string associated with @val in the
183  * given series table, or NULL in case there is no match.
184  */
185 
series_table_get_string(series_table * st,double val)186 const char *series_table_get_string (series_table *st, double val)
187 {
188     const char *ret = NULL;
189 
190     if (!na(val)) {
191 	int k = (int) lrint(val);
192 
193 	if (k > 0 && k <= st->n_strs) {
194 	    ret = st->strs[k-1];
195 	}
196     }
197 
198     return ret;
199 }
200 
201 /**
202  * series_table_map:
203  * @st_from: gretl series table.
204  * @st_to: gretl series table.
205  *
206  * Constructs a mapping from the integer codes in @st_from
207  * to those in @st_to. For example, if the string "foo"
208  * has code 3 in @st_from and code 12 in @st_to, then
209  * element 3 in the mapping array will have value 12.
210  * For any strings in @st_from that are not matched
211  * in @st_to, the associated element of the map is set
212  * to -1.
213  *
214  * Element 0 of the map holds the number of following
215  * elements, which is the same as the number of strings in
216  * @st_from.
217  *
218  * Returns: allocated array of int or NULL in case of failure.
219  */
220 
series_table_map(series_table * st_from,series_table * st_to)221 int *series_table_map (series_table *st_from, series_table *st_to)
222 {
223     int *map = NULL;
224     int n = st_from->n_strs;
225 
226     map = gretl_list_new(n);
227 
228     if (map != NULL) {
229 	const char *s1;
230 	int i, i2;
231 
232 	for (i=0; i<n; i++) {
233 	    s1 = st_from->strs[i];
234 	    i2 = series_table_get_index(st_to, s1);
235 	    map[i+1] = i2 == 0 ? -1 : i2;
236 	}
237     }
238 
239     return map;
240 }
241 
242 /**
243  * series_table_get_strings:
244  * @st: a gretl series table.
245  * @n_strs: location to receive the number of strings.
246  *
247  * Returns: the array of strings associated with @st. These
248  * should not be modified in any way.
249  */
250 
series_table_get_strings(series_table * st,int * n_strs)251 char **series_table_get_strings (series_table *st, int *n_strs)
252 {
253     if (st != NULL) {
254 	*n_strs = st->n_strs;
255 	return st->strs;
256     } else {
257 	return NULL;
258     }
259 }
260 
series_table_get_n_strings(series_table * st)261 int series_table_get_n_strings (series_table *st)
262 {
263     if (st != NULL) {
264 	return st->n_strs;
265     } else {
266 	return 0;
267     }
268 }
269 
get_unquoted(const char * s)270 static char *get_unquoted (const char *s)
271 {
272     char *tmp = NULL;
273     int n = strlen(s);
274 
275     if (s[n-1] == s[0]) {
276 	tmp = gretl_strndup(s+1, n-2);
277     }
278 
279     return tmp;
280 }
281 
282 /**
283  * series_table_add_string:
284  * @st: a gretl series table.
285  * @s: new string to add.
286  *
287  * Returns: the index of the new string within the table, or
288  * -1 on failure.
289  */
290 
series_table_add_string(series_table * st,const char * s)291 int series_table_add_string (series_table *st, const char *s)
292 {
293     char *tmp = NULL;
294     int n, err;
295 
296     if (*s == '"' || *s == '\'') {
297 	tmp = get_unquoted(s);
298     }
299 
300     if (tmp != NULL) {
301 	st->flags |= ST_QUOTED;
302 	err = strings_array_add(&st->strs, &st->n_strs, tmp);
303 	free(tmp);
304     } else {
305 	err = strings_array_add(&st->strs, &st->n_strs, s);
306     }
307 
308     if (err) {
309 	n = -1;
310     } else {
311 	n = st->n_strs;
312 	g_hash_table_insert(st->ht, (gpointer) st->strs[n-1],
313 			    GINT_TO_POINTER(n));
314     }
315 
316     return n;
317 }
318 
series_table_new(char ** strs,int n_strs,int * err)319 series_table *series_table_new (char **strs, int n_strs, int *err)
320 {
321     series_table *st = series_table_alloc();
322     int i;
323 
324     if (st == NULL) {
325 	*err = E_ALLOC;
326     } else {
327 	st->n_strs = n_strs;
328 	st->strs = strs;
329 	for (i=0; i<n_strs; i++) {
330 	    if (st->strs[i] == NULL) {
331 		fprintf(stderr, "series_table_new: str %d is NULL\n", i);
332 		*err = E_DATA;
333 	    } else {
334 		g_hash_table_insert(st->ht, (gpointer) st->strs[i],
335 				    GINT_TO_POINTER(i+1));
336 	    }
337 	}
338     }
339 
340     return st;
341 }
342 
series_table_copy(series_table * st)343 series_table *series_table_copy (series_table *st)
344 {
345     series_table *ret = NULL;
346 
347     if (st != NULL) {
348 	ret = series_table_alloc();
349     }
350 
351     if (ret != NULL) {
352 	char **S = strings_array_dup(st->strs, st->n_strs);
353 	int i;
354 
355 	if (S == NULL) {
356 	    series_table_destroy(ret);
357 	    ret = NULL;
358 	} else {
359 	    ret->n_strs = st->n_strs;
360 	    ret->strs = S;
361 	    for (i=0; i<ret->n_strs; i++) {
362 		g_hash_table_insert(ret->ht, (gpointer) ret->strs[i],
363 				    GINT_TO_POINTER(i+1));
364 	    }
365 	}
366     }
367 
368     return ret;
369 }
370 
371 static series_table *
gretl_string_table_add_column(gretl_string_table * gst,int colnum)372 gretl_string_table_add_column (gretl_string_table *gst, int colnum)
373 {
374     series_table **cols;
375     int *newlist;
376     int n, err = 0;
377 
378     newlist = gretl_list_append_term(&gst->cols_list, colnum);
379     if (newlist == NULL) {
380 	return NULL;
381     }
382 
383     n = gst->cols_list[0];
384 
385     cols = realloc(gst->cols, n * sizeof *cols);
386     if (cols == NULL) {
387 	err = E_ALLOC;
388     } else {
389 	gst->cols = cols;
390 	cols[n-1] = series_table_alloc();
391 	if (cols[n-1] == NULL) {
392 	    err = E_ALLOC;
393 	}
394     }
395 
396     if (err) {
397 	gst->cols_list[0] -= 1;
398 	return NULL;
399     } else {
400 	return cols[n-1];
401     }
402 }
403 
404 /**
405  * gretl_string_table_index:
406  * @gst: a gretl string table.
407  * @s: the string to look up or add.
408  * @col: index of the column to be accessed or created.
409  * @addcol: non-zero to indicate that a column should be
410  * added if it's not already present.
411  * @prn: gretl printer (or %NULL).
412  *
413  * This function has two main uses: for lookup in the context of
414  * a completed string table, or for constructing such a
415  * table (with @addcol non-zero).  The returned index reflects
416  * any additions to the table that may be required (if column
417  * @col does not already exist, or if string @s is not already
418  * stored for column @col).
419  *
420  * Returns: the 1-based index of @s within the column of
421  * @st that has index @col, if available, otherwise 0.
422  */
423 
424 int
gretl_string_table_index(gretl_string_table * gst,const char * s,int col,int addcol,PRN * prn)425 gretl_string_table_index (gretl_string_table *gst, const char *s,
426 			  int col, int addcol, PRN *prn)
427 {
428     series_table *st = NULL;
429     char *tmp = NULL;
430     int i, idx = 0;
431 
432     if (gst == NULL) {
433 	return idx;
434     }
435 
436     if (gst->cols_list != NULL) {
437 	for (i=1; i<=gst->cols_list[0]; i++) {
438 	    if (gst->cols_list[i] == col) {
439 		st = gst->cols[i-1];
440 		break;
441 	    }
442 	}
443     }
444 
445     if (*s == '"') {
446 	tmp = get_unquoted(s);
447     }
448 
449     if (st != NULL) {
450 	/* there's a table for this column already */
451 	idx = series_table_get_index(st, tmp != NULL ? tmp : s);
452     } else if (addcol) {
453 	/* no table for this column yet: start one now */
454 	st = gretl_string_table_add_column(gst, col);
455 	if (st != NULL) {
456 	    pprintf(prn, _("variable %d: translating from strings to "
457 			   "code numbers\n"), col);
458 	}
459     }
460 
461     if (idx == 0 && st != NULL) {
462 	idx = series_table_add_string(st, s);
463     }
464 
465     free(tmp);
466 
467     return idx;
468 }
469 
470 /* Used in the context of deletion of "empty" variables from
471    an imported dataset: the index of a given "column" in
472    a string table is adjusted to match the new position of
473    the variable in question.
474 */
475 
gretl_string_table_reset_column_id(gretl_string_table * gst,int oldid,int newid)476 int gretl_string_table_reset_column_id (gretl_string_table *gst,
477 					int oldid, int newid)
478 {
479     if (gst != NULL) {
480 	int i;
481 
482 	for (i=1; i<=gst->cols_list[0]; i++) {
483 	    if (gst->cols_list[i] == oldid) {
484 		gst->cols_list[i] = newid;
485 		return 0;
486 	    }
487 	}
488     }
489 
490     return E_DATA;
491 }
492 
gretl_string_table_detach_col(gretl_string_table * gst,int col)493 series_table *gretl_string_table_detach_col (gretl_string_table *gst,
494 					     int col)
495 {
496     series_table *st = NULL;
497 
498     if (gst != NULL) {
499 	int pos = in_gretl_list(gst->cols_list, col);
500 	int i, n = gst->cols_list[0];
501 
502 	if (pos > 0) {
503 	    st = gst->cols[pos-1];
504 	    for (i=pos-1; i<n-1; i++) {
505 		gst->cols[i] = gst->cols[i+1];
506 	    }
507 	    gst->cols[n-1] = NULL;
508 	    gretl_list_delete_at_pos(gst->cols_list, pos);
509 	}
510     }
511 
512     return st;
513 }
514 
in_string_table(gretl_string_table * gst,int id)515 int in_string_table (gretl_string_table *gst, int id)
516 {
517     if (gst != NULL) {
518 	return in_gretl_list(gst->cols_list, id);
519     } else {
520 	return 0;
521     }
522 }
523 
string_table_copy_list(gretl_string_table * gst)524 int *string_table_copy_list (gretl_string_table *gst)
525 {
526     if (gst != NULL) {
527 	return gretl_list_copy(gst->cols_list);
528     } else {
529 	return NULL;
530     }
531 }
532 
string_table_replace_list(gretl_string_table * gst,int * newlist)533 int string_table_replace_list (gretl_string_table *gst,
534 			       int *newlist)
535 {
536     if (gst != NULL) {
537 	/* FIXME pruning? */
538 	free(gst->cols_list);
539 	gst->cols_list = newlist;
540     }
541 
542     return E_DATA;
543 }
544 
545 /**
546  * series_table_destroy:
547  * @st: series string table.
548  *
549  * Frees all resources associated with @st.
550  */
551 
series_table_destroy(series_table * st)552 void series_table_destroy (series_table *st)
553 {
554     if (st != NULL) {
555 	strings_array_free(st->strs, st->n_strs);
556 	if (st->ht != NULL) {
557 	    g_hash_table_destroy(st->ht);
558 	}
559 	free(st);
560     }
561 }
562 
563 /**
564  * gretl_string_table_destroy:
565  * @gst: gretl string table.
566  *
567  * Frees all resources associated with @gst.
568  */
569 
gretl_string_table_destroy(gretl_string_table * gst)570 void gretl_string_table_destroy (gretl_string_table *gst)
571 {
572     int i, ncols;
573 
574     if (gst == NULL) return;
575 
576     ncols = (gst->cols_list != NULL)? gst->cols_list[0] : 0;
577 
578     for (i=0; i<ncols; i++) {
579 	series_table_destroy(gst->cols[i]);
580     }
581     free(gst->cols);
582 
583     free(gst->cols_list);
584 
585     if (gst->extra != NULL) {
586 	free(gst->extra);
587     }
588 
589     free(gst);
590 }
591 
592 /* Given a series_table in which all the strings are just
593    representations of integers, write the integer values
594    into the series and destroy the table, while marking
595    the series as "coded". Or, if all the strings are valid
596    doubles, convert to numeric.
597 */
598 
series_commute_string_table(DATASET * dset,int i,series_table * st)599 static void series_commute_string_table (DATASET *dset, int i,
600 					 series_table *st)
601 {
602     if (dset != NULL && i > 0 && i < dset->v) {
603 	const char *s;
604 	double val;
605 	int t;
606 
607 	for (t=0; t<dset->n; t++) {
608 	    val = dset->Z[i][t];
609 	    if (!na(val)) {
610 		s = series_table_get_string(st, val);
611 		if (all_ints(st)) {
612 		    dset->Z[i][t] = (double) atoi(s);
613 		} else {
614 		    dset->Z[i][t] = atof(s);
615 		}
616 	    }
617 	}
618 	if (all_ints(st)) {
619 	    series_set_flag(dset, i, VAR_DISCRETE);
620 	    if (!gretl_isdummy(0, dset->n - 1, dset->Z[i])) {
621 		series_set_flag(dset, i, VAR_CODED);
622 	    }
623 	} else {
624 	    series_unset_flag(dset, i, VAR_DISCRETE);
625 	}
626 	series_table_destroy(st);
627     }
628 }
629 
630 /**
631  * gretl_string_table_print:
632  * @gst: gretl string table.
633  * @dset: dataset information (for names of variables).
634  * @fname: name of the datafile to which the table pertains.
635  * @prn: gretl printer (or %NULL).
636  *
637  * In basic usage, prints table @gst to string_table.txt in
638  * the user's working directory. However, if one or more of
639  * the series referenced by @gst are deemed to be integer codes
640  * or misclassified numeric data, their "series tables" are
641  * commuted into numeric form. If all series are handled in
642  * this way, the string_table is not printed.
643  *
644  * Returns: 0 on success, non-zero on error.
645  */
646 
gretl_string_table_print(gretl_string_table * gst,DATASET * dset,const char * fname,PRN * prn)647 int gretl_string_table_print (gretl_string_table *gst, DATASET *dset,
648 			      const char *fname, PRN *prn)
649 {
650     series_table *st;
651     const char *fshort;
652     char stname[MAXLEN];
653     FILE *fp = NULL;
654     int i, j, ncols = 0;
655     int n_strvars = 0;
656     int err = 0;
657 
658     if (gst == NULL) {
659 	return E_DATA;
660     }
661 
662     ncols = (gst->cols_list != NULL)? gst->cols_list[0] : 0;
663     n_strvars = ncols;
664 
665     /* first examine the string table for numeric codings */
666     for (i=0; i<ncols; i++) {
667 	st = gst->cols[i];
668 	if (st == NULL || all_num(st)) {
669 	    n_strvars--;
670 	}
671     }
672 
673     if (n_strvars > 0) {
674 	strcpy(stname, "string_table.txt");
675 	gretl_path_prepend(stname, gretl_workdir());
676 
677 	fp = gretl_fopen(stname, "w");
678 	if (fp == NULL) {
679 	    return E_FOPEN;
680 	}
681 
682 	fshort = strrslash(fname);
683 	if (fshort != NULL) {
684 	    fprintf(fp, "%s\n", fshort + 1);
685 	} else {
686 	    fprintf(fp, "%s\n", fname);
687 	}
688 
689 	fputc('\n', fp);
690 	fputs(_("One or more non-numeric variables were found.\n"
691 		"These variables have been given numeric codes as follows.\n\n"), fp);
692 	if (gst->extra != NULL) {
693 	    fputs(_("In addition, some mappings from numerical values to string\n"
694 		    "labels were found, and are printed below.\n\n"), fp);
695 	}
696     }
697 
698     for (i=0; i<ncols; i++) {
699 	int vi = gst->cols_list[i+1];
700 
701 	st = gst->cols[i];
702 	if (fp != NULL && st != NULL && !all_num(st)) {
703 	    if (i > 0) {
704 		fputc('\n', fp);
705 	    }
706 	    fprintf(fp, _("String code table for variable %d (%s):\n"),
707 		    vi, dset->varname[vi]);
708 	    for (j=0; j<st->n_strs; j++) {
709 		fprintf(fp, "%3d = '%s'\n", j+1, st->strs[j]);
710 	    }
711 	}
712 	if (dset->varinfo != NULL) {
713 	    if (all_num(st)) {
714 		pputs(prn, "commuting series table\n");
715 		series_commute_string_table(dset, vi, st);
716 	    } else {
717 		series_attach_string_table(dset, vi, st);
718 	    }
719 	    gst->cols[i] = NULL;
720 	}
721     }
722 
723     if (fp != NULL) {
724 	if (gst->extra != NULL) {
725 	    fputs(gst->extra, fp);
726 	}
727 	pprintf(prn, _("String code table written to\n %s\n"), stname);
728 	fclose(fp);
729 	set_string_table_written();
730     }
731 
732     return err;
733 }
734 
string_is_double(const char * s)735 static int string_is_double (const char *s)
736 {
737     char *test;
738 
739     errno = 0;
740     strtod(s, &test);
741     return errno == 0 && *test == '\0';
742 }
743 
744 /**
745  * gretl_string_table_validate:
746  * @gst: gretl string table.
747  * @opt: may include OPT_S to indicate that the string
748  * table was constructed from a spreadsheet file, in which
749  * case certain cells may be explicitly marked as holding
750  * strings, which information may be not be reliable.
751  *
752  * Checks that the "string values" in @gst are not in fact
753  * undigested quasi-numerical values. We run this on
754  * imported CSV data to ensure we don't produce
755  * misleading results. In the spreadsheet case we're on
756  * the lookout for purely numerical data wrongly identified
757  * as strings; we don't treat that as fatal but mark the
758  * series table(s) for conversion to numeric.
759  *
760  * Returns: 0 on success, non-zero on error.
761  */
762 
gretl_string_table_validate(gretl_string_table * gst,gretlopt opt)763 int gretl_string_table_validate (gretl_string_table *gst,
764 				 gretlopt opt)
765 {
766     const char *test = "0123456789.,";
767     int ssheet = (opt & OPT_S);
768     int i, ncols = 0;
769     int err = 0;
770 
771     if (gst != NULL && gst->cols_list != NULL) {
772 	ncols = gst->cols_list[0];
773     }
774 
775     for (i=0; i<ncols; i++) {
776 	series_table *st = gst->cols[i];
777 	const char *s;
778 	int nint = 0;
779 	int ndbl = 0;
780 	int j, myerr = E_DATA;
781 
782 	if (st_quoted(st)) {
783 	    myerr = 0;
784 	}
785 
786 	for (j=0; j<st->n_strs; j++) {
787 	    s = st->strs[j];
788 	    if (st_quoted(st)) {
789 		if (integer_string(s)) {
790 		    nint++;
791 		}
792 	    } else {
793 		/* not quoted */
794 		if (ssheet && (*s == '\0' || string_is_double(s))) {
795 		    /* could really be numeric? (2020-12-25) */
796 		    ndbl++;
797 		    continue;
798 		} else if (*s == '-' || *s == '+') {
799 		    s++;
800 		}
801 		if (strspn(s, test) < strlen(s)) {
802 		    /* not quasi-numeric */
803 		    myerr = 0;
804 		    break;
805 		}
806 	    }
807 	}
808 
809 	if (nint == st->n_strs) {
810 	    /* treat as integer codes */
811 	    st->flags |= ST_ALLINTS;
812 	} else if (ndbl == st->n_strs) {
813 	    /* all really numeric */
814 	    st->flags |= ST_ALLDBLS;
815 	    myerr = 0;
816 	}
817 
818 	if (myerr) {
819 	    err = myerr;
820 	    break;
821 	}
822     }
823 
824     return err;
825 }
826 
827 /**
828  * gretl_string_table_save:
829  * @gst: gretl string table.
830  * @dset: dataset information (for names of variables).
831  *
832  * Attaches the content of @gst to @dset.
833  *
834  * Returns: 0 on success, non-zero on error.
835  */
836 
gretl_string_table_save(gretl_string_table * gst,DATASET * dset)837 int gretl_string_table_save (gretl_string_table *gst, DATASET *dset)
838 {
839     series_table *st;
840     int i, vi, ncols = 0;
841 
842     if (gst == NULL || dset->varinfo == NULL) {
843 	return E_DATA;
844     }
845 
846     ncols = (gst->cols_list != NULL)? gst->cols_list[0] : 0;
847 
848     for (i=0; i<ncols; i++) {
849 	st = gst->cols[i];
850 	if (st != NULL) {
851 	    vi = gst->cols_list[i+1];
852 	    st = gst->cols[i];
853 	    series_attach_string_table(dset, vi, st);
854 	    gst->cols[i] = NULL;
855 	}
856     }
857 
858     return 0;
859 }
860 
861 /**
862  * gretl_string_table_add_extra:
863  * @gst: gretl string table.
864  * @prn: gretl printer.
865  *
866  * Steals the printing buffer from @prn and adds it to @gst.
867  * The buffer will be appended when @gst is printed via
868  * gretl_string_table_print().
869  */
870 
gretl_string_table_add_extra(gretl_string_table * gst,PRN * prn)871 void gretl_string_table_add_extra (gretl_string_table *gst, PRN *prn)
872 {
873     if (gst != NULL && prn != NULL) {
874 	gst->extra = gretl_print_steal_buffer(prn);
875     }
876 }
877 
878 /* apparatus for built-in strings */
879 
880 struct built_in_string_ {
881     char name[VNAMELEN];
882     gchar *s;
883 };
884 
885 typedef struct built_in_string_ built_in_string;
886 
887 static built_in_string built_ins[] = {
888     { "gretldir", NULL },
889     { "dotdir",   NULL },
890     { "workdir",  NULL },
891     { "gnuplot",  NULL },
892     { "x12a",     NULL },
893     { "x12adir",  NULL },
894     { "tramo",    NULL },
895     { "tramodir", NULL },
896     { "seats",    NULL },
897     { "pkgdir",   NULL },
898     { "lang",     NULL },
899     { "logfile",  NULL }
900 };
901 
builtin_strings_cleanup(void)902 void builtin_strings_cleanup (void)
903 {
904     int i, n = sizeof built_ins / sizeof built_ins[0];
905 
906     for (i=0; i<n; i++) {
907 	g_free(built_ins[i].s);
908     }
909 }
910 
911 /**
912  * gretl_insert_builtin_string:
913  * @name: the name of the string to be added or replaced.
914  * @s: the value for this string variable.
915  *
916  * Inserts value @s for string @name in gretl's table
917  * of built-in string variables.
918  */
919 
gretl_insert_builtin_string(const char * name,const char * s)920 void gretl_insert_builtin_string (const char *name, const char *s)
921 {
922     int i, n = sizeof built_ins / sizeof built_ins[0];
923     int m, gui = gretl_in_gui_mode();
924 
925     for (i=0; i<n; i++) {
926 	if (!strcmp(name, built_ins[i].name)) {
927 	    g_free(built_ins[i].s);
928 	    if (s == NULL) {
929 		built_ins[i].s = NULL;
930 	    } else if (gui && !g_utf8_validate(s, -1, NULL)) {
931 		/* handle non-ASCII Windows paths */
932 		gsize bytes;
933 		gchar *u;
934 
935 		u = g_locale_to_utf8(s, -1, NULL, &bytes, NULL);
936 		if (u != NULL) {
937 		    m = strlen(u);
938 		    if (u[m-1] == SLASH) {
939 			u[m-1] = '\0';
940 		    }
941 		}
942 		built_ins[i].s = u;
943 	    } else {
944 		m = strlen(s);
945 		if (s[m-1] == SLASH) {
946 		    /* drop trailing dir separator for paths */
947 		    built_ins[i].s = g_strndup(s, m - 1);
948 		} else {
949 		    built_ins[i].s = g_strdup(s);
950 		}
951 	    }
952 	    return;
953 	}
954     }
955 }
956 
get_built_in_string_by_name(const char * name)957 char *get_built_in_string_by_name (const char *name)
958 {
959     int i, n = sizeof built_ins / sizeof built_ins[0];
960 
961     for (i=0; i<n; i++) {
962 	if (!strcmp(name, built_ins[i].name)) {
963 	    char *s = built_ins[i].s;
964 
965 	    return s != NULL ? s : "";
966 	}
967     }
968 
969     return NULL;
970 }
971 
972 /* Try to recode the content of a local file or web resource
973    to UTF-8. Can be tricky since we don't know the original
974    encoding of the content.
975 */
976 
recode_content(gchar * orig,const char * codeset,int * err)977 static gchar *recode_content (gchar *orig, const char *codeset,
978 			      int *err)
979 {
980     const gchar *charset = NULL;
981     GError *gerr = NULL;
982     gsize wrote = 0;
983     gchar *tr;
984 
985     if (codeset != NULL) {
986 	/* the user specified the source encoding */
987 	tr = g_convert(orig, -1, "UTF-8", codeset,
988 		       NULL, &wrote, &gerr);
989     } else if (g_get_charset(&charset)) {
990 	/* we're in a UTF-8 locale, so we know that
991 	   g_locale_to_utf8 won't do the job; so guess
992 	   the content is iso-8859-something?
993 	*/
994 	tr = g_convert(orig, -1, "UTF-8", "ISO-8859-15",
995 		       NULL, &wrote, &gerr);
996     } else {
997 	/* try assuming the material is in the locale
998 	   encoding */
999 	tr = g_locale_to_utf8(orig, -1, NULL, &wrote, &gerr);
1000 	if (gerr != NULL) {
1001 	    /* failed: try iso-8859-15? */
1002 	    g_error_free(gerr);
1003 	    gerr = NULL;
1004 	    tr = g_convert(orig, -1, "UTF-8", "ISO-8859-15",
1005 			   NULL, &wrote, &gerr);
1006 	}
1007     }
1008 
1009     if (gerr != NULL) {
1010 	gretl_errmsg_set(gerr->message);
1011 	*err = E_DATA;
1012 	g_error_free(gerr);
1013     }
1014 
1015     g_free(orig);
1016 
1017     return tr;
1018 }
1019 
shell_grab(const char * arg,char ** sout)1020 static int shell_grab (const char *arg, char **sout)
1021 {
1022     int err = 0;
1023 
1024     if (arg == NULL || *arg == '\0') {
1025 	return E_PARSE;
1026     }
1027 
1028     if (!libset_get_bool(SHELL_OK)) {
1029 	gretl_errmsg_set(_("The shell command is not activated."));
1030 	return 1;
1031     }
1032 
1033     gretl_shell_grab(arg, sout);
1034 
1035     if (sout != NULL && *sout != NULL) {
1036 	char *content = *sout;
1037 
1038 	if (!g_utf8_validate(content, -1, NULL)) {
1039 	    content = recode_content(content, NULL, &err);
1040 	    *sout = content;
1041 	}
1042 
1043 	if (content != NULL) {
1044 	    /* trim trailing newline */
1045 	    int n = strlen(content);
1046 
1047 	    if (content[n-1] == '\n') {
1048 		content[n-1] = '\0';
1049 	    }
1050 	}
1051     }
1052 
1053     return err;
1054 }
1055 
gretl_backtick(const char * arg,int * err)1056 char *gretl_backtick (const char *arg, int *err)
1057 {
1058     char *val = NULL;
1059 
1060     *err = shell_grab(arg, &val);
1061 
1062     if (!*err && val == NULL) {
1063 	val = gretl_strdup("");
1064 	if (val == NULL) {
1065 	    *err = E_ALLOC;
1066 	}
1067     }
1068 
1069     return val;
1070 }
1071 
gretl_getenv(const char * key,int * defined,int * err)1072 char *gretl_getenv (const char *key, int *defined, int *err)
1073 {
1074     char *test = getenv(key);
1075     char *val = NULL;
1076 
1077     if (test == NULL) {
1078 	*defined = 0;
1079 	val = gretl_strdup("");
1080     } else {
1081 	*defined = 1;
1082 	val = gretl_strdup(test);
1083     }
1084 
1085     if (val == NULL) {
1086 	*err = E_ALLOC;
1087     }
1088 
1089     return val;
1090 }
1091 
retrieve_date_string(int t,const DATASET * dset,int * err)1092 char *retrieve_date_string (int t, const DATASET *dset, int *err)
1093 {
1094     char *ret = NULL;
1095 
1096     if (t <= 0 || t > dset->n) {
1097 	*err = E_DATA;
1098     } else if (dset->S != NULL) {
1099 	ret = gretl_strdup(dset->S[t-1]);
1100 	if (ret == NULL) {
1101 	    *err = E_ALLOC;
1102 	}
1103     } else {
1104 	char datestr[OBSLEN] = {0};
1105 
1106 	ntolabel(datestr, t - 1, dset);
1107 	ret = gretl_strdup(datestr);
1108 	if (ret == NULL) {
1109 	    *err = E_ALLOC;
1110 	}
1111     }
1112 
1113     return ret;
1114 }
1115 
1116 /* returns a gretl_array of strings on success */
1117 
retrieve_date_strings(const gretl_vector * v,const DATASET * dset,int * err)1118 gretl_array *retrieve_date_strings (const gretl_vector *v,
1119 				    const DATASET *dset,
1120 				    int *err)
1121 {
1122     gretl_array *ret = NULL;
1123     char *s = NULL;
1124     int i, t, n;
1125 
1126     n = gretl_vector_get_length(v);
1127     if (n == 0) {
1128 	*err = E_INVARG;
1129     } else {
1130 	ret = gretl_array_new(GRETL_TYPE_STRINGS, n, err);
1131     }
1132 
1133     for (i=0; i<n && !*err; i++) {
1134 	t = gretl_int_from_double(v->val[i], err);
1135 	if (!*err) {
1136 	    s = retrieve_date_string(t, dset, err);
1137 	}
1138 	if (!*err) {
1139 	    gretl_array_set_data(ret, i, s);
1140 	}
1141     }
1142 
1143     if (*err && ret != NULL) {
1144 	gretl_array_destroy(ret);
1145 	ret = NULL;
1146     }
1147 
1148     return ret;
1149 }
1150 
is_web_resource(const char * s)1151 static int is_web_resource (const char *s)
1152 {
1153     if (!strncmp(s, "http://", 7) ||
1154 	!strncmp(s, "https://", 8) ||
1155 	!strncmp(s, "ftp://", 6)) {
1156 	return 1;
1157     } else {
1158 	return 0;
1159     }
1160 }
1161 
gzipped_file_get_content(const char * fname,int * err)1162 static gchar *gzipped_file_get_content (const char *fname,
1163 					int *err)
1164 {
1165     gzFile fz = gretl_gzopen(fname, "rb");
1166     gchar *ret = NULL;
1167 
1168     if (fz == NULL) {
1169 	*err = E_FOPEN;
1170     } else {
1171 	size_t len = 0;
1172 	int chk;
1173 
1174 	while (gzgetc(fz) > 0) {
1175 	    len++;
1176 	}
1177 	if (len > 0) {
1178 	    gzrewind(fz);
1179 	    ret = g_try_malloc(len + 1);
1180 	    if (ret == NULL) {
1181 		*err = E_ALLOC;
1182 	    } else {
1183 		chk = gzread(fz, ret, len);
1184 		if (chk <= 0) {
1185 		    *err = E_DATA;
1186 		}
1187 		ret[len] = '\0';
1188 	    }
1189 	} else {
1190 	    ret = g_strdup("");
1191 	}
1192 	gzclose(fz);
1193     }
1194 
1195     return ret;
1196 }
1197 
regular_file_get_content(const char * fname,int * err)1198 static gchar *regular_file_get_content (const char *fname,
1199 					int *err)
1200 {
1201     GError *gerr = NULL;
1202     gchar *ret = NULL;
1203     size_t len = 0;
1204     int done = 0;
1205 
1206 #ifdef WIN32
1207     /* g_file_get_contents() requires a UTF-8 filename */
1208     if (!g_utf8_validate(fname, -1, NULL)) {
1209 	gchar *fconv;
1210 	gsize wrote = 0;
1211 
1212 	fconv = g_locale_to_utf8(fname, -1, NULL, &wrote, &gerr);
1213 	if (fconv != NULL) {
1214 	    g_file_get_contents(fconv, &ret, &len, &gerr);
1215 	    g_free(fconv);
1216 	}
1217 	done = 1;
1218     }
1219 #endif
1220     if (!done) {
1221 	g_file_get_contents(fname, &ret, &len, &gerr);
1222     }
1223 
1224     if (gerr != NULL) {
1225 	gretl_errmsg_set(gerr->message);
1226 	*err = E_FOPEN;
1227 	g_error_free(gerr);
1228     }
1229 
1230     return ret;
1231 }
1232 
retrieve_file_content(const char * fname,const char * codeset,int * err)1233 char *retrieve_file_content (const char *fname, const char *codeset,
1234 			     int *err)
1235 {
1236     char *ret = NULL;
1237     gchar *content = NULL;
1238     size_t len = 0;
1239     gssize sz;
1240 
1241     if (fname == NULL || *fname == '\0') {
1242 	*err = E_INVARG;
1243     } else if (is_web_resource(fname)) {
1244 #ifdef USE_CURL
1245 	content = retrieve_public_file_as_buffer(fname, &len, err);
1246 #else
1247 	gretl_errmsg_set(_("Internet access not supported"));
1248 	*err = E_DATA;
1249 #endif
1250     } else {
1251 	char fullname[FILENAME_MAX] = {0};
1252 
1253 	strncat(fullname, fname, FILENAME_MAX - 1);
1254 	gretl_addpath(fullname, 0);
1255 	if (is_gzipped(fullname)) {
1256 	    content = gzipped_file_get_content(fullname, err);
1257 	} else {
1258 	    content = regular_file_get_content(fullname, err);
1259 	}
1260     }
1261 
1262     sz = (len > 0)? len : -1;
1263     if (content != NULL && !g_utf8_validate(content, sz, NULL)) {
1264 	content = recode_content(content, codeset, err);
1265     }
1266 
1267     if (content != NULL) {
1268 	if (*err == 0) {
1269 	    ret = gretl_strdup(content);
1270 	}
1271 	g_free(content);
1272     }
1273 
1274     return ret;
1275 }
1276 
1277 /* inserting string into format portion of (s)printf command:
1278    double any backslashes to avoid breakage of Windows paths
1279 */
1280 
mod_strdup(const char * s)1281 static char *mod_strdup (const char *s)
1282 {
1283     char *ret = NULL;
1284     int i, n = strlen(s);
1285     int bs = 0;
1286 
1287     for (i=0; i<n; i++) {
1288 	if (s[i] == '\\' && (i == n - 1 || s[i+1] != '\\')) {
1289 	    bs++;
1290 	}
1291     }
1292 
1293     ret = malloc(n + 1 + bs);
1294     if (ret == NULL) {
1295 	return NULL;
1296     }
1297 
1298     if (bs == 0) {
1299 	strcpy(ret, s);
1300     } else {
1301 	int j = 0;
1302 
1303 	for (i=0; i<n; i++) {
1304 	    if (s[i] == '\\' && (i == n - 1 || s[i+1] != '\\')) {
1305 		ret[j++] = '\\';
1306 	    }
1307 	    ret[j++] = s[i];
1308 	}
1309 	ret[j] = '\0';
1310     }
1311 
1312     return ret;
1313 }
1314 
maybe_get_subst(char * name,int * n,int quoted,int * freeit)1315 static char *maybe_get_subst (char *name, int *n, int quoted,
1316 			      int *freeit)
1317 {
1318     char *s = NULL;
1319     int k = *n - 1;
1320     char *ret = NULL;
1321 
1322     while (k >= 0) {
1323 	s = (char *) get_string_by_name(name);
1324 	if (s != NULL) {
1325 	    *n = k + 1;
1326 	    ret = s;
1327 	    break;
1328 	}
1329 	name[k--] = '\0';
1330     }
1331 
1332     if (ret != NULL) {
1333 	if (quoted && strchr(ret, '\\')) {
1334 	    ret = mod_strdup(ret);
1335 	    *freeit = 1;
1336 	}
1337     }
1338 
1339     return ret;
1340 }
1341 
too_long(void)1342 static void too_long (void)
1343 {
1344     gretl_errmsg_sprintf(_("Maximum length of command line "
1345 			   "(%d bytes) exceeded\n"), MAXLINE);
1346 }
1347 
substitute_named_strings(char * line,int * subst)1348 int substitute_named_strings (char *line, int *subst)
1349 {
1350     char sname[VNAMELEN];
1351     int len = strlen(line);
1352     char *sub, *tmp, *s = line;
1353     int bs = 0, in_format = 0;
1354     int freeit;
1355     int i, n, m, err = 0;
1356 
1357     *subst = 0;
1358 
1359     if (*s == '#' || strchr(s, '@') == NULL) {
1360 	return 0;
1361     }
1362 
1363     if (!strncmp(line, "printf", 6) || !strncmp(line, "sprintf", 7)) {
1364 	s = strchr(s, '"');
1365 	if (s == NULL) {
1366 	    /* no format string */
1367 	    return E_PARSE;
1368 	}
1369 	s++;
1370 	in_format = 1;
1371     }
1372 
1373     i = s - line;
1374 
1375     while (*s && !err) {
1376 	if (in_format) {
1377 	    if (*s == '"' && (bs % 2 == 0)) {
1378 		/* reached end of (s)printf format string */
1379 		in_format = 0;
1380 	    }
1381 	    if (*s == '\\') {
1382 		bs++;
1383 	    } else {
1384 		bs = 0;
1385 	    }
1386 	}
1387 	if (*s == '@') {
1388 	    n = gretl_namechar_spn(s + 1);
1389 	    if (n > 0) {
1390 		if (n >= VNAMELEN) {
1391 		    n = VNAMELEN - 1;
1392 		}
1393 		*sname = '\0';
1394 		strncat(sname, s + 1, n);
1395 		freeit = 0;
1396 		sub = maybe_get_subst(sname, &n, in_format, &freeit);
1397 		if (sub != NULL) {
1398 		    m = strlen(sub);
1399 		    if (len + m + 2 >= MAXLINE) {
1400 			too_long();
1401 			err = 1;
1402 			break;
1403 		    }
1404 		    tmp = gretl_strdup(s + n + 1);
1405 		    if (tmp == NULL) {
1406 			err = E_ALLOC;
1407 		    } else {
1408 			strcpy(s, sub);
1409 			strcpy(s + m, tmp);
1410 			free(tmp);
1411 			len += m - (n + 1);
1412 			s += m - 1;
1413 			i += m - 1;
1414 			*subst = 1;
1415 		    }
1416 		    if (freeit) {
1417 			free(sub);
1418 		    }
1419 		}
1420 	    }
1421 	}
1422 	s++;
1423 	i++;
1424     }
1425 
1426     return err;
1427 }
1428