1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include "libgretl.h"
21 #include "libset.h"
22 #include "gretl_func.h"
23 #include "uservar.h"
24 #include "gretl_array.h"
25 #include "gretl_string_table.h"
26 #ifdef USE_CURL
27 # include "gretl_www.h"
28 #endif
29
30 #include <errno.h>
31 #include <glib.h>
32
33 enum {
34 ST_QUOTED = 1 << 0,
35 ST_ALLINTS = 1 << 1,
36 ST_ALLDBLS = 1 << 2
37 };
38
39 struct series_table_ {
40 int n_strs; /* number of strings in table */
41 char **strs; /* saved strings */
42 GHashTable *ht; /* hash table for quick lookup */
43 int flags; /* status flags (above) */
44 };
45
46 struct gretl_string_table_ {
47 int *cols_list; /* list of included columns */
48 series_table **cols; /* per-column tables (see above) */
49 char *extra; /* extra information, if any */
50 };
51
52 #define st_quoted(t) (t->flags & ST_QUOTED)
53 #define all_ints(t) (t->flags & ST_ALLINTS)
54 #define all_dbls(t) (t->flags & ST_ALLDBLS)
55 #define all_num(t) (t->flags & (ST_ALLINTS | ST_ALLDBLS))
56
series_table_alloc(void)57 static series_table *series_table_alloc (void)
58 {
59 series_table *st = malloc(sizeof *st);
60
61 if (st != NULL) {
62 st->strs = NULL;
63 st->n_strs = 0;
64 st->ht = g_hash_table_new(g_str_hash, g_str_equal);
65 st->flags = 0;
66 }
67
68 return st;
69 }
70
gretl_string_table_alloc(void)71 static gretl_string_table *gretl_string_table_alloc (void)
72 {
73 gretl_string_table *gst = malloc(sizeof *gst);
74
75 if (gst != NULL) {
76 gst->cols_list = NULL;
77 gst->cols = NULL;
78 gst->extra = NULL;
79 }
80
81 return gst;
82 }
83
84 /**
85 * gretl_string_table_new:
86 * @list: list of series IDs whose values are to be
87 * given a string representation, or NULL.
88 *
89 * These values in @list should correspond to the 0-based indices
90 * of the series in question within the dataset. For example,
91 * if strings are to be recorded for variables 2, 5 and 10 the
92 * @list argument would be {3, 2, 5, 10}. If NULL is passed for
93 * @list the return value is an initialized, empty string table.
94 *
95 * Returns: pointer to a newly allocated string table or NULL
96 * on failure.
97 */
98
gretl_string_table_new(const int * list)99 gretl_string_table *gretl_string_table_new (const int *list)
100 {
101 gretl_string_table *gst;
102 int ncols = 0;
103 int err = 0;
104
105 gst = gretl_string_table_alloc();
106 if (gst == NULL) {
107 return NULL;
108 }
109
110 if (list != NULL && list[0] > 0) {
111 gst->cols_list = gretl_list_copy(list);
112 if (gst->cols_list == NULL) {
113 err = E_ALLOC;
114 } else {
115 ncols = list[0];
116 }
117 }
118
119 if (ncols > 0) {
120 gst->cols = malloc(ncols * sizeof *gst->cols);
121 if (gst->cols == NULL) {
122 err = E_ALLOC;
123 } else {
124 int i, j;
125
126 for (i=0; i<ncols && !err; i++) {
127 gst->cols[i] = series_table_alloc();
128 if (gst->cols[i] == NULL) {
129 err = E_ALLOC;
130 for (j=0; j<i; j++) {
131 free(gst->cols[j]);
132 }
133 free(gst->cols);
134 }
135 }
136 }
137 }
138
139 if (err) {
140 free(gst->cols_list);
141 free(gst);
142 gst = NULL;
143 }
144
145 return gst;
146 }
147
series_table_get_index(const series_table * st,const char * s)148 static int series_table_get_index (const series_table *st,
149 const char *s)
150 {
151 gpointer p = g_hash_table_lookup(st->ht, s);
152 int ret = 0;
153
154 if (p != NULL) {
155 ret = GPOINTER_TO_INT(p);
156 }
157
158 return ret;
159 }
160
161 /**
162 * series_table_get_value:
163 * @st: a gretl series table.
164 * @s: the string to look up.
165 *
166 * Returns: the numerical value associated with @s in the
167 * given series table, or #NADBL in case there is no match.
168 */
169
series_table_get_value(series_table * st,const char * s)170 double series_table_get_value (series_table *st, const char *s)
171 {
172 int k = series_table_get_index(st, s);
173
174 return (k > 0)? (double) k : NADBL;
175 }
176
177 /**
178 * series_table_get_string:
179 * @st: a gretl series table.
180 * @val: the numerical value to look up.
181 *
182 * Returns: the string associated with @val in the
183 * given series table, or NULL in case there is no match.
184 */
185
series_table_get_string(series_table * st,double val)186 const char *series_table_get_string (series_table *st, double val)
187 {
188 const char *ret = NULL;
189
190 if (!na(val)) {
191 int k = (int) lrint(val);
192
193 if (k > 0 && k <= st->n_strs) {
194 ret = st->strs[k-1];
195 }
196 }
197
198 return ret;
199 }
200
201 /**
202 * series_table_map:
203 * @st_from: gretl series table.
204 * @st_to: gretl series table.
205 *
206 * Constructs a mapping from the integer codes in @st_from
207 * to those in @st_to. For example, if the string "foo"
208 * has code 3 in @st_from and code 12 in @st_to, then
209 * element 3 in the mapping array will have value 12.
210 * For any strings in @st_from that are not matched
211 * in @st_to, the associated element of the map is set
212 * to -1.
213 *
214 * Element 0 of the map holds the number of following
215 * elements, which is the same as the number of strings in
216 * @st_from.
217 *
218 * Returns: allocated array of int or NULL in case of failure.
219 */
220
series_table_map(series_table * st_from,series_table * st_to)221 int *series_table_map (series_table *st_from, series_table *st_to)
222 {
223 int *map = NULL;
224 int n = st_from->n_strs;
225
226 map = gretl_list_new(n);
227
228 if (map != NULL) {
229 const char *s1;
230 int i, i2;
231
232 for (i=0; i<n; i++) {
233 s1 = st_from->strs[i];
234 i2 = series_table_get_index(st_to, s1);
235 map[i+1] = i2 == 0 ? -1 : i2;
236 }
237 }
238
239 return map;
240 }
241
242 /**
243 * series_table_get_strings:
244 * @st: a gretl series table.
245 * @n_strs: location to receive the number of strings.
246 *
247 * Returns: the array of strings associated with @st. These
248 * should not be modified in any way.
249 */
250
series_table_get_strings(series_table * st,int * n_strs)251 char **series_table_get_strings (series_table *st, int *n_strs)
252 {
253 if (st != NULL) {
254 *n_strs = st->n_strs;
255 return st->strs;
256 } else {
257 return NULL;
258 }
259 }
260
series_table_get_n_strings(series_table * st)261 int series_table_get_n_strings (series_table *st)
262 {
263 if (st != NULL) {
264 return st->n_strs;
265 } else {
266 return 0;
267 }
268 }
269
get_unquoted(const char * s)270 static char *get_unquoted (const char *s)
271 {
272 char *tmp = NULL;
273 int n = strlen(s);
274
275 if (s[n-1] == s[0]) {
276 tmp = gretl_strndup(s+1, n-2);
277 }
278
279 return tmp;
280 }
281
282 /**
283 * series_table_add_string:
284 * @st: a gretl series table.
285 * @s: new string to add.
286 *
287 * Returns: the index of the new string within the table, or
288 * -1 on failure.
289 */
290
series_table_add_string(series_table * st,const char * s)291 int series_table_add_string (series_table *st, const char *s)
292 {
293 char *tmp = NULL;
294 int n, err;
295
296 if (*s == '"' || *s == '\'') {
297 tmp = get_unquoted(s);
298 }
299
300 if (tmp != NULL) {
301 st->flags |= ST_QUOTED;
302 err = strings_array_add(&st->strs, &st->n_strs, tmp);
303 free(tmp);
304 } else {
305 err = strings_array_add(&st->strs, &st->n_strs, s);
306 }
307
308 if (err) {
309 n = -1;
310 } else {
311 n = st->n_strs;
312 g_hash_table_insert(st->ht, (gpointer) st->strs[n-1],
313 GINT_TO_POINTER(n));
314 }
315
316 return n;
317 }
318
series_table_new(char ** strs,int n_strs,int * err)319 series_table *series_table_new (char **strs, int n_strs, int *err)
320 {
321 series_table *st = series_table_alloc();
322 int i;
323
324 if (st == NULL) {
325 *err = E_ALLOC;
326 } else {
327 st->n_strs = n_strs;
328 st->strs = strs;
329 for (i=0; i<n_strs; i++) {
330 if (st->strs[i] == NULL) {
331 fprintf(stderr, "series_table_new: str %d is NULL\n", i);
332 *err = E_DATA;
333 } else {
334 g_hash_table_insert(st->ht, (gpointer) st->strs[i],
335 GINT_TO_POINTER(i+1));
336 }
337 }
338 }
339
340 return st;
341 }
342
series_table_copy(series_table * st)343 series_table *series_table_copy (series_table *st)
344 {
345 series_table *ret = NULL;
346
347 if (st != NULL) {
348 ret = series_table_alloc();
349 }
350
351 if (ret != NULL) {
352 char **S = strings_array_dup(st->strs, st->n_strs);
353 int i;
354
355 if (S == NULL) {
356 series_table_destroy(ret);
357 ret = NULL;
358 } else {
359 ret->n_strs = st->n_strs;
360 ret->strs = S;
361 for (i=0; i<ret->n_strs; i++) {
362 g_hash_table_insert(ret->ht, (gpointer) ret->strs[i],
363 GINT_TO_POINTER(i+1));
364 }
365 }
366 }
367
368 return ret;
369 }
370
371 static series_table *
gretl_string_table_add_column(gretl_string_table * gst,int colnum)372 gretl_string_table_add_column (gretl_string_table *gst, int colnum)
373 {
374 series_table **cols;
375 int *newlist;
376 int n, err = 0;
377
378 newlist = gretl_list_append_term(&gst->cols_list, colnum);
379 if (newlist == NULL) {
380 return NULL;
381 }
382
383 n = gst->cols_list[0];
384
385 cols = realloc(gst->cols, n * sizeof *cols);
386 if (cols == NULL) {
387 err = E_ALLOC;
388 } else {
389 gst->cols = cols;
390 cols[n-1] = series_table_alloc();
391 if (cols[n-1] == NULL) {
392 err = E_ALLOC;
393 }
394 }
395
396 if (err) {
397 gst->cols_list[0] -= 1;
398 return NULL;
399 } else {
400 return cols[n-1];
401 }
402 }
403
404 /**
405 * gretl_string_table_index:
406 * @gst: a gretl string table.
407 * @s: the string to look up or add.
408 * @col: index of the column to be accessed or created.
409 * @addcol: non-zero to indicate that a column should be
410 * added if it's not already present.
411 * @prn: gretl printer (or %NULL).
412 *
413 * This function has two main uses: for lookup in the context of
414 * a completed string table, or for constructing such a
415 * table (with @addcol non-zero). The returned index reflects
416 * any additions to the table that may be required (if column
417 * @col does not already exist, or if string @s is not already
418 * stored for column @col).
419 *
420 * Returns: the 1-based index of @s within the column of
421 * @st that has index @col, if available, otherwise 0.
422 */
423
424 int
gretl_string_table_index(gretl_string_table * gst,const char * s,int col,int addcol,PRN * prn)425 gretl_string_table_index (gretl_string_table *gst, const char *s,
426 int col, int addcol, PRN *prn)
427 {
428 series_table *st = NULL;
429 char *tmp = NULL;
430 int i, idx = 0;
431
432 if (gst == NULL) {
433 return idx;
434 }
435
436 if (gst->cols_list != NULL) {
437 for (i=1; i<=gst->cols_list[0]; i++) {
438 if (gst->cols_list[i] == col) {
439 st = gst->cols[i-1];
440 break;
441 }
442 }
443 }
444
445 if (*s == '"') {
446 tmp = get_unquoted(s);
447 }
448
449 if (st != NULL) {
450 /* there's a table for this column already */
451 idx = series_table_get_index(st, tmp != NULL ? tmp : s);
452 } else if (addcol) {
453 /* no table for this column yet: start one now */
454 st = gretl_string_table_add_column(gst, col);
455 if (st != NULL) {
456 pprintf(prn, _("variable %d: translating from strings to "
457 "code numbers\n"), col);
458 }
459 }
460
461 if (idx == 0 && st != NULL) {
462 idx = series_table_add_string(st, s);
463 }
464
465 free(tmp);
466
467 return idx;
468 }
469
470 /* Used in the context of deletion of "empty" variables from
471 an imported dataset: the index of a given "column" in
472 a string table is adjusted to match the new position of
473 the variable in question.
474 */
475
gretl_string_table_reset_column_id(gretl_string_table * gst,int oldid,int newid)476 int gretl_string_table_reset_column_id (gretl_string_table *gst,
477 int oldid, int newid)
478 {
479 if (gst != NULL) {
480 int i;
481
482 for (i=1; i<=gst->cols_list[0]; i++) {
483 if (gst->cols_list[i] == oldid) {
484 gst->cols_list[i] = newid;
485 return 0;
486 }
487 }
488 }
489
490 return E_DATA;
491 }
492
gretl_string_table_detach_col(gretl_string_table * gst,int col)493 series_table *gretl_string_table_detach_col (gretl_string_table *gst,
494 int col)
495 {
496 series_table *st = NULL;
497
498 if (gst != NULL) {
499 int pos = in_gretl_list(gst->cols_list, col);
500 int i, n = gst->cols_list[0];
501
502 if (pos > 0) {
503 st = gst->cols[pos-1];
504 for (i=pos-1; i<n-1; i++) {
505 gst->cols[i] = gst->cols[i+1];
506 }
507 gst->cols[n-1] = NULL;
508 gretl_list_delete_at_pos(gst->cols_list, pos);
509 }
510 }
511
512 return st;
513 }
514
in_string_table(gretl_string_table * gst,int id)515 int in_string_table (gretl_string_table *gst, int id)
516 {
517 if (gst != NULL) {
518 return in_gretl_list(gst->cols_list, id);
519 } else {
520 return 0;
521 }
522 }
523
string_table_copy_list(gretl_string_table * gst)524 int *string_table_copy_list (gretl_string_table *gst)
525 {
526 if (gst != NULL) {
527 return gretl_list_copy(gst->cols_list);
528 } else {
529 return NULL;
530 }
531 }
532
string_table_replace_list(gretl_string_table * gst,int * newlist)533 int string_table_replace_list (gretl_string_table *gst,
534 int *newlist)
535 {
536 if (gst != NULL) {
537 /* FIXME pruning? */
538 free(gst->cols_list);
539 gst->cols_list = newlist;
540 }
541
542 return E_DATA;
543 }
544
545 /**
546 * series_table_destroy:
547 * @st: series string table.
548 *
549 * Frees all resources associated with @st.
550 */
551
series_table_destroy(series_table * st)552 void series_table_destroy (series_table *st)
553 {
554 if (st != NULL) {
555 strings_array_free(st->strs, st->n_strs);
556 if (st->ht != NULL) {
557 g_hash_table_destroy(st->ht);
558 }
559 free(st);
560 }
561 }
562
563 /**
564 * gretl_string_table_destroy:
565 * @gst: gretl string table.
566 *
567 * Frees all resources associated with @gst.
568 */
569
gretl_string_table_destroy(gretl_string_table * gst)570 void gretl_string_table_destroy (gretl_string_table *gst)
571 {
572 int i, ncols;
573
574 if (gst == NULL) return;
575
576 ncols = (gst->cols_list != NULL)? gst->cols_list[0] : 0;
577
578 for (i=0; i<ncols; i++) {
579 series_table_destroy(gst->cols[i]);
580 }
581 free(gst->cols);
582
583 free(gst->cols_list);
584
585 if (gst->extra != NULL) {
586 free(gst->extra);
587 }
588
589 free(gst);
590 }
591
592 /* Given a series_table in which all the strings are just
593 representations of integers, write the integer values
594 into the series and destroy the table, while marking
595 the series as "coded". Or, if all the strings are valid
596 doubles, convert to numeric.
597 */
598
series_commute_string_table(DATASET * dset,int i,series_table * st)599 static void series_commute_string_table (DATASET *dset, int i,
600 series_table *st)
601 {
602 if (dset != NULL && i > 0 && i < dset->v) {
603 const char *s;
604 double val;
605 int t;
606
607 for (t=0; t<dset->n; t++) {
608 val = dset->Z[i][t];
609 if (!na(val)) {
610 s = series_table_get_string(st, val);
611 if (all_ints(st)) {
612 dset->Z[i][t] = (double) atoi(s);
613 } else {
614 dset->Z[i][t] = atof(s);
615 }
616 }
617 }
618 if (all_ints(st)) {
619 series_set_flag(dset, i, VAR_DISCRETE);
620 if (!gretl_isdummy(0, dset->n - 1, dset->Z[i])) {
621 series_set_flag(dset, i, VAR_CODED);
622 }
623 } else {
624 series_unset_flag(dset, i, VAR_DISCRETE);
625 }
626 series_table_destroy(st);
627 }
628 }
629
630 /**
631 * gretl_string_table_print:
632 * @gst: gretl string table.
633 * @dset: dataset information (for names of variables).
634 * @fname: name of the datafile to which the table pertains.
635 * @prn: gretl printer (or %NULL).
636 *
637 * In basic usage, prints table @gst to string_table.txt in
638 * the user's working directory. However, if one or more of
639 * the series referenced by @gst are deemed to be integer codes
640 * or misclassified numeric data, their "series tables" are
641 * commuted into numeric form. If all series are handled in
642 * this way, the string_table is not printed.
643 *
644 * Returns: 0 on success, non-zero on error.
645 */
646
gretl_string_table_print(gretl_string_table * gst,DATASET * dset,const char * fname,PRN * prn)647 int gretl_string_table_print (gretl_string_table *gst, DATASET *dset,
648 const char *fname, PRN *prn)
649 {
650 series_table *st;
651 const char *fshort;
652 char stname[MAXLEN];
653 FILE *fp = NULL;
654 int i, j, ncols = 0;
655 int n_strvars = 0;
656 int err = 0;
657
658 if (gst == NULL) {
659 return E_DATA;
660 }
661
662 ncols = (gst->cols_list != NULL)? gst->cols_list[0] : 0;
663 n_strvars = ncols;
664
665 /* first examine the string table for numeric codings */
666 for (i=0; i<ncols; i++) {
667 st = gst->cols[i];
668 if (st == NULL || all_num(st)) {
669 n_strvars--;
670 }
671 }
672
673 if (n_strvars > 0) {
674 strcpy(stname, "string_table.txt");
675 gretl_path_prepend(stname, gretl_workdir());
676
677 fp = gretl_fopen(stname, "w");
678 if (fp == NULL) {
679 return E_FOPEN;
680 }
681
682 fshort = strrslash(fname);
683 if (fshort != NULL) {
684 fprintf(fp, "%s\n", fshort + 1);
685 } else {
686 fprintf(fp, "%s\n", fname);
687 }
688
689 fputc('\n', fp);
690 fputs(_("One or more non-numeric variables were found.\n"
691 "These variables have been given numeric codes as follows.\n\n"), fp);
692 if (gst->extra != NULL) {
693 fputs(_("In addition, some mappings from numerical values to string\n"
694 "labels were found, and are printed below.\n\n"), fp);
695 }
696 }
697
698 for (i=0; i<ncols; i++) {
699 int vi = gst->cols_list[i+1];
700
701 st = gst->cols[i];
702 if (fp != NULL && st != NULL && !all_num(st)) {
703 if (i > 0) {
704 fputc('\n', fp);
705 }
706 fprintf(fp, _("String code table for variable %d (%s):\n"),
707 vi, dset->varname[vi]);
708 for (j=0; j<st->n_strs; j++) {
709 fprintf(fp, "%3d = '%s'\n", j+1, st->strs[j]);
710 }
711 }
712 if (dset->varinfo != NULL) {
713 if (all_num(st)) {
714 pputs(prn, "commuting series table\n");
715 series_commute_string_table(dset, vi, st);
716 } else {
717 series_attach_string_table(dset, vi, st);
718 }
719 gst->cols[i] = NULL;
720 }
721 }
722
723 if (fp != NULL) {
724 if (gst->extra != NULL) {
725 fputs(gst->extra, fp);
726 }
727 pprintf(prn, _("String code table written to\n %s\n"), stname);
728 fclose(fp);
729 set_string_table_written();
730 }
731
732 return err;
733 }
734
string_is_double(const char * s)735 static int string_is_double (const char *s)
736 {
737 char *test;
738
739 errno = 0;
740 strtod(s, &test);
741 return errno == 0 && *test == '\0';
742 }
743
744 /**
745 * gretl_string_table_validate:
746 * @gst: gretl string table.
747 * @opt: may include OPT_S to indicate that the string
748 * table was constructed from a spreadsheet file, in which
749 * case certain cells may be explicitly marked as holding
750 * strings, which information may be not be reliable.
751 *
752 * Checks that the "string values" in @gst are not in fact
753 * undigested quasi-numerical values. We run this on
754 * imported CSV data to ensure we don't produce
755 * misleading results. In the spreadsheet case we're on
756 * the lookout for purely numerical data wrongly identified
757 * as strings; we don't treat that as fatal but mark the
758 * series table(s) for conversion to numeric.
759 *
760 * Returns: 0 on success, non-zero on error.
761 */
762
gretl_string_table_validate(gretl_string_table * gst,gretlopt opt)763 int gretl_string_table_validate (gretl_string_table *gst,
764 gretlopt opt)
765 {
766 const char *test = "0123456789.,";
767 int ssheet = (opt & OPT_S);
768 int i, ncols = 0;
769 int err = 0;
770
771 if (gst != NULL && gst->cols_list != NULL) {
772 ncols = gst->cols_list[0];
773 }
774
775 for (i=0; i<ncols; i++) {
776 series_table *st = gst->cols[i];
777 const char *s;
778 int nint = 0;
779 int ndbl = 0;
780 int j, myerr = E_DATA;
781
782 if (st_quoted(st)) {
783 myerr = 0;
784 }
785
786 for (j=0; j<st->n_strs; j++) {
787 s = st->strs[j];
788 if (st_quoted(st)) {
789 if (integer_string(s)) {
790 nint++;
791 }
792 } else {
793 /* not quoted */
794 if (ssheet && (*s == '\0' || string_is_double(s))) {
795 /* could really be numeric? (2020-12-25) */
796 ndbl++;
797 continue;
798 } else if (*s == '-' || *s == '+') {
799 s++;
800 }
801 if (strspn(s, test) < strlen(s)) {
802 /* not quasi-numeric */
803 myerr = 0;
804 break;
805 }
806 }
807 }
808
809 if (nint == st->n_strs) {
810 /* treat as integer codes */
811 st->flags |= ST_ALLINTS;
812 } else if (ndbl == st->n_strs) {
813 /* all really numeric */
814 st->flags |= ST_ALLDBLS;
815 myerr = 0;
816 }
817
818 if (myerr) {
819 err = myerr;
820 break;
821 }
822 }
823
824 return err;
825 }
826
827 /**
828 * gretl_string_table_save:
829 * @gst: gretl string table.
830 * @dset: dataset information (for names of variables).
831 *
832 * Attaches the content of @gst to @dset.
833 *
834 * Returns: 0 on success, non-zero on error.
835 */
836
gretl_string_table_save(gretl_string_table * gst,DATASET * dset)837 int gretl_string_table_save (gretl_string_table *gst, DATASET *dset)
838 {
839 series_table *st;
840 int i, vi, ncols = 0;
841
842 if (gst == NULL || dset->varinfo == NULL) {
843 return E_DATA;
844 }
845
846 ncols = (gst->cols_list != NULL)? gst->cols_list[0] : 0;
847
848 for (i=0; i<ncols; i++) {
849 st = gst->cols[i];
850 if (st != NULL) {
851 vi = gst->cols_list[i+1];
852 st = gst->cols[i];
853 series_attach_string_table(dset, vi, st);
854 gst->cols[i] = NULL;
855 }
856 }
857
858 return 0;
859 }
860
861 /**
862 * gretl_string_table_add_extra:
863 * @gst: gretl string table.
864 * @prn: gretl printer.
865 *
866 * Steals the printing buffer from @prn and adds it to @gst.
867 * The buffer will be appended when @gst is printed via
868 * gretl_string_table_print().
869 */
870
gretl_string_table_add_extra(gretl_string_table * gst,PRN * prn)871 void gretl_string_table_add_extra (gretl_string_table *gst, PRN *prn)
872 {
873 if (gst != NULL && prn != NULL) {
874 gst->extra = gretl_print_steal_buffer(prn);
875 }
876 }
877
878 /* apparatus for built-in strings */
879
880 struct built_in_string_ {
881 char name[VNAMELEN];
882 gchar *s;
883 };
884
885 typedef struct built_in_string_ built_in_string;
886
887 static built_in_string built_ins[] = {
888 { "gretldir", NULL },
889 { "dotdir", NULL },
890 { "workdir", NULL },
891 { "gnuplot", NULL },
892 { "x12a", NULL },
893 { "x12adir", NULL },
894 { "tramo", NULL },
895 { "tramodir", NULL },
896 { "seats", NULL },
897 { "pkgdir", NULL },
898 { "lang", NULL },
899 { "logfile", NULL }
900 };
901
builtin_strings_cleanup(void)902 void builtin_strings_cleanup (void)
903 {
904 int i, n = sizeof built_ins / sizeof built_ins[0];
905
906 for (i=0; i<n; i++) {
907 g_free(built_ins[i].s);
908 }
909 }
910
911 /**
912 * gretl_insert_builtin_string:
913 * @name: the name of the string to be added or replaced.
914 * @s: the value for this string variable.
915 *
916 * Inserts value @s for string @name in gretl's table
917 * of built-in string variables.
918 */
919
gretl_insert_builtin_string(const char * name,const char * s)920 void gretl_insert_builtin_string (const char *name, const char *s)
921 {
922 int i, n = sizeof built_ins / sizeof built_ins[0];
923 int m, gui = gretl_in_gui_mode();
924
925 for (i=0; i<n; i++) {
926 if (!strcmp(name, built_ins[i].name)) {
927 g_free(built_ins[i].s);
928 if (s == NULL) {
929 built_ins[i].s = NULL;
930 } else if (gui && !g_utf8_validate(s, -1, NULL)) {
931 /* handle non-ASCII Windows paths */
932 gsize bytes;
933 gchar *u;
934
935 u = g_locale_to_utf8(s, -1, NULL, &bytes, NULL);
936 if (u != NULL) {
937 m = strlen(u);
938 if (u[m-1] == SLASH) {
939 u[m-1] = '\0';
940 }
941 }
942 built_ins[i].s = u;
943 } else {
944 m = strlen(s);
945 if (s[m-1] == SLASH) {
946 /* drop trailing dir separator for paths */
947 built_ins[i].s = g_strndup(s, m - 1);
948 } else {
949 built_ins[i].s = g_strdup(s);
950 }
951 }
952 return;
953 }
954 }
955 }
956
get_built_in_string_by_name(const char * name)957 char *get_built_in_string_by_name (const char *name)
958 {
959 int i, n = sizeof built_ins / sizeof built_ins[0];
960
961 for (i=0; i<n; i++) {
962 if (!strcmp(name, built_ins[i].name)) {
963 char *s = built_ins[i].s;
964
965 return s != NULL ? s : "";
966 }
967 }
968
969 return NULL;
970 }
971
972 /* Try to recode the content of a local file or web resource
973 to UTF-8. Can be tricky since we don't know the original
974 encoding of the content.
975 */
976
recode_content(gchar * orig,const char * codeset,int * err)977 static gchar *recode_content (gchar *orig, const char *codeset,
978 int *err)
979 {
980 const gchar *charset = NULL;
981 GError *gerr = NULL;
982 gsize wrote = 0;
983 gchar *tr;
984
985 if (codeset != NULL) {
986 /* the user specified the source encoding */
987 tr = g_convert(orig, -1, "UTF-8", codeset,
988 NULL, &wrote, &gerr);
989 } else if (g_get_charset(&charset)) {
990 /* we're in a UTF-8 locale, so we know that
991 g_locale_to_utf8 won't do the job; so guess
992 the content is iso-8859-something?
993 */
994 tr = g_convert(orig, -1, "UTF-8", "ISO-8859-15",
995 NULL, &wrote, &gerr);
996 } else {
997 /* try assuming the material is in the locale
998 encoding */
999 tr = g_locale_to_utf8(orig, -1, NULL, &wrote, &gerr);
1000 if (gerr != NULL) {
1001 /* failed: try iso-8859-15? */
1002 g_error_free(gerr);
1003 gerr = NULL;
1004 tr = g_convert(orig, -1, "UTF-8", "ISO-8859-15",
1005 NULL, &wrote, &gerr);
1006 }
1007 }
1008
1009 if (gerr != NULL) {
1010 gretl_errmsg_set(gerr->message);
1011 *err = E_DATA;
1012 g_error_free(gerr);
1013 }
1014
1015 g_free(orig);
1016
1017 return tr;
1018 }
1019
shell_grab(const char * arg,char ** sout)1020 static int shell_grab (const char *arg, char **sout)
1021 {
1022 int err = 0;
1023
1024 if (arg == NULL || *arg == '\0') {
1025 return E_PARSE;
1026 }
1027
1028 if (!libset_get_bool(SHELL_OK)) {
1029 gretl_errmsg_set(_("The shell command is not activated."));
1030 return 1;
1031 }
1032
1033 gretl_shell_grab(arg, sout);
1034
1035 if (sout != NULL && *sout != NULL) {
1036 char *content = *sout;
1037
1038 if (!g_utf8_validate(content, -1, NULL)) {
1039 content = recode_content(content, NULL, &err);
1040 *sout = content;
1041 }
1042
1043 if (content != NULL) {
1044 /* trim trailing newline */
1045 int n = strlen(content);
1046
1047 if (content[n-1] == '\n') {
1048 content[n-1] = '\0';
1049 }
1050 }
1051 }
1052
1053 return err;
1054 }
1055
gretl_backtick(const char * arg,int * err)1056 char *gretl_backtick (const char *arg, int *err)
1057 {
1058 char *val = NULL;
1059
1060 *err = shell_grab(arg, &val);
1061
1062 if (!*err && val == NULL) {
1063 val = gretl_strdup("");
1064 if (val == NULL) {
1065 *err = E_ALLOC;
1066 }
1067 }
1068
1069 return val;
1070 }
1071
gretl_getenv(const char * key,int * defined,int * err)1072 char *gretl_getenv (const char *key, int *defined, int *err)
1073 {
1074 char *test = getenv(key);
1075 char *val = NULL;
1076
1077 if (test == NULL) {
1078 *defined = 0;
1079 val = gretl_strdup("");
1080 } else {
1081 *defined = 1;
1082 val = gretl_strdup(test);
1083 }
1084
1085 if (val == NULL) {
1086 *err = E_ALLOC;
1087 }
1088
1089 return val;
1090 }
1091
retrieve_date_string(int t,const DATASET * dset,int * err)1092 char *retrieve_date_string (int t, const DATASET *dset, int *err)
1093 {
1094 char *ret = NULL;
1095
1096 if (t <= 0 || t > dset->n) {
1097 *err = E_DATA;
1098 } else if (dset->S != NULL) {
1099 ret = gretl_strdup(dset->S[t-1]);
1100 if (ret == NULL) {
1101 *err = E_ALLOC;
1102 }
1103 } else {
1104 char datestr[OBSLEN] = {0};
1105
1106 ntolabel(datestr, t - 1, dset);
1107 ret = gretl_strdup(datestr);
1108 if (ret == NULL) {
1109 *err = E_ALLOC;
1110 }
1111 }
1112
1113 return ret;
1114 }
1115
1116 /* returns a gretl_array of strings on success */
1117
retrieve_date_strings(const gretl_vector * v,const DATASET * dset,int * err)1118 gretl_array *retrieve_date_strings (const gretl_vector *v,
1119 const DATASET *dset,
1120 int *err)
1121 {
1122 gretl_array *ret = NULL;
1123 char *s = NULL;
1124 int i, t, n;
1125
1126 n = gretl_vector_get_length(v);
1127 if (n == 0) {
1128 *err = E_INVARG;
1129 } else {
1130 ret = gretl_array_new(GRETL_TYPE_STRINGS, n, err);
1131 }
1132
1133 for (i=0; i<n && !*err; i++) {
1134 t = gretl_int_from_double(v->val[i], err);
1135 if (!*err) {
1136 s = retrieve_date_string(t, dset, err);
1137 }
1138 if (!*err) {
1139 gretl_array_set_data(ret, i, s);
1140 }
1141 }
1142
1143 if (*err && ret != NULL) {
1144 gretl_array_destroy(ret);
1145 ret = NULL;
1146 }
1147
1148 return ret;
1149 }
1150
is_web_resource(const char * s)1151 static int is_web_resource (const char *s)
1152 {
1153 if (!strncmp(s, "http://", 7) ||
1154 !strncmp(s, "https://", 8) ||
1155 !strncmp(s, "ftp://", 6)) {
1156 return 1;
1157 } else {
1158 return 0;
1159 }
1160 }
1161
gzipped_file_get_content(const char * fname,int * err)1162 static gchar *gzipped_file_get_content (const char *fname,
1163 int *err)
1164 {
1165 gzFile fz = gretl_gzopen(fname, "rb");
1166 gchar *ret = NULL;
1167
1168 if (fz == NULL) {
1169 *err = E_FOPEN;
1170 } else {
1171 size_t len = 0;
1172 int chk;
1173
1174 while (gzgetc(fz) > 0) {
1175 len++;
1176 }
1177 if (len > 0) {
1178 gzrewind(fz);
1179 ret = g_try_malloc(len + 1);
1180 if (ret == NULL) {
1181 *err = E_ALLOC;
1182 } else {
1183 chk = gzread(fz, ret, len);
1184 if (chk <= 0) {
1185 *err = E_DATA;
1186 }
1187 ret[len] = '\0';
1188 }
1189 } else {
1190 ret = g_strdup("");
1191 }
1192 gzclose(fz);
1193 }
1194
1195 return ret;
1196 }
1197
regular_file_get_content(const char * fname,int * err)1198 static gchar *regular_file_get_content (const char *fname,
1199 int *err)
1200 {
1201 GError *gerr = NULL;
1202 gchar *ret = NULL;
1203 size_t len = 0;
1204 int done = 0;
1205
1206 #ifdef WIN32
1207 /* g_file_get_contents() requires a UTF-8 filename */
1208 if (!g_utf8_validate(fname, -1, NULL)) {
1209 gchar *fconv;
1210 gsize wrote = 0;
1211
1212 fconv = g_locale_to_utf8(fname, -1, NULL, &wrote, &gerr);
1213 if (fconv != NULL) {
1214 g_file_get_contents(fconv, &ret, &len, &gerr);
1215 g_free(fconv);
1216 }
1217 done = 1;
1218 }
1219 #endif
1220 if (!done) {
1221 g_file_get_contents(fname, &ret, &len, &gerr);
1222 }
1223
1224 if (gerr != NULL) {
1225 gretl_errmsg_set(gerr->message);
1226 *err = E_FOPEN;
1227 g_error_free(gerr);
1228 }
1229
1230 return ret;
1231 }
1232
retrieve_file_content(const char * fname,const char * codeset,int * err)1233 char *retrieve_file_content (const char *fname, const char *codeset,
1234 int *err)
1235 {
1236 char *ret = NULL;
1237 gchar *content = NULL;
1238 size_t len = 0;
1239 gssize sz;
1240
1241 if (fname == NULL || *fname == '\0') {
1242 *err = E_INVARG;
1243 } else if (is_web_resource(fname)) {
1244 #ifdef USE_CURL
1245 content = retrieve_public_file_as_buffer(fname, &len, err);
1246 #else
1247 gretl_errmsg_set(_("Internet access not supported"));
1248 *err = E_DATA;
1249 #endif
1250 } else {
1251 char fullname[FILENAME_MAX] = {0};
1252
1253 strncat(fullname, fname, FILENAME_MAX - 1);
1254 gretl_addpath(fullname, 0);
1255 if (is_gzipped(fullname)) {
1256 content = gzipped_file_get_content(fullname, err);
1257 } else {
1258 content = regular_file_get_content(fullname, err);
1259 }
1260 }
1261
1262 sz = (len > 0)? len : -1;
1263 if (content != NULL && !g_utf8_validate(content, sz, NULL)) {
1264 content = recode_content(content, codeset, err);
1265 }
1266
1267 if (content != NULL) {
1268 if (*err == 0) {
1269 ret = gretl_strdup(content);
1270 }
1271 g_free(content);
1272 }
1273
1274 return ret;
1275 }
1276
1277 /* inserting string into format portion of (s)printf command:
1278 double any backslashes to avoid breakage of Windows paths
1279 */
1280
mod_strdup(const char * s)1281 static char *mod_strdup (const char *s)
1282 {
1283 char *ret = NULL;
1284 int i, n = strlen(s);
1285 int bs = 0;
1286
1287 for (i=0; i<n; i++) {
1288 if (s[i] == '\\' && (i == n - 1 || s[i+1] != '\\')) {
1289 bs++;
1290 }
1291 }
1292
1293 ret = malloc(n + 1 + bs);
1294 if (ret == NULL) {
1295 return NULL;
1296 }
1297
1298 if (bs == 0) {
1299 strcpy(ret, s);
1300 } else {
1301 int j = 0;
1302
1303 for (i=0; i<n; i++) {
1304 if (s[i] == '\\' && (i == n - 1 || s[i+1] != '\\')) {
1305 ret[j++] = '\\';
1306 }
1307 ret[j++] = s[i];
1308 }
1309 ret[j] = '\0';
1310 }
1311
1312 return ret;
1313 }
1314
maybe_get_subst(char * name,int * n,int quoted,int * freeit)1315 static char *maybe_get_subst (char *name, int *n, int quoted,
1316 int *freeit)
1317 {
1318 char *s = NULL;
1319 int k = *n - 1;
1320 char *ret = NULL;
1321
1322 while (k >= 0) {
1323 s = (char *) get_string_by_name(name);
1324 if (s != NULL) {
1325 *n = k + 1;
1326 ret = s;
1327 break;
1328 }
1329 name[k--] = '\0';
1330 }
1331
1332 if (ret != NULL) {
1333 if (quoted && strchr(ret, '\\')) {
1334 ret = mod_strdup(ret);
1335 *freeit = 1;
1336 }
1337 }
1338
1339 return ret;
1340 }
1341
too_long(void)1342 static void too_long (void)
1343 {
1344 gretl_errmsg_sprintf(_("Maximum length of command line "
1345 "(%d bytes) exceeded\n"), MAXLINE);
1346 }
1347
substitute_named_strings(char * line,int * subst)1348 int substitute_named_strings (char *line, int *subst)
1349 {
1350 char sname[VNAMELEN];
1351 int len = strlen(line);
1352 char *sub, *tmp, *s = line;
1353 int bs = 0, in_format = 0;
1354 int freeit;
1355 int i, n, m, err = 0;
1356
1357 *subst = 0;
1358
1359 if (*s == '#' || strchr(s, '@') == NULL) {
1360 return 0;
1361 }
1362
1363 if (!strncmp(line, "printf", 6) || !strncmp(line, "sprintf", 7)) {
1364 s = strchr(s, '"');
1365 if (s == NULL) {
1366 /* no format string */
1367 return E_PARSE;
1368 }
1369 s++;
1370 in_format = 1;
1371 }
1372
1373 i = s - line;
1374
1375 while (*s && !err) {
1376 if (in_format) {
1377 if (*s == '"' && (bs % 2 == 0)) {
1378 /* reached end of (s)printf format string */
1379 in_format = 0;
1380 }
1381 if (*s == '\\') {
1382 bs++;
1383 } else {
1384 bs = 0;
1385 }
1386 }
1387 if (*s == '@') {
1388 n = gretl_namechar_spn(s + 1);
1389 if (n > 0) {
1390 if (n >= VNAMELEN) {
1391 n = VNAMELEN - 1;
1392 }
1393 *sname = '\0';
1394 strncat(sname, s + 1, n);
1395 freeit = 0;
1396 sub = maybe_get_subst(sname, &n, in_format, &freeit);
1397 if (sub != NULL) {
1398 m = strlen(sub);
1399 if (len + m + 2 >= MAXLINE) {
1400 too_long();
1401 err = 1;
1402 break;
1403 }
1404 tmp = gretl_strdup(s + n + 1);
1405 if (tmp == NULL) {
1406 err = E_ALLOC;
1407 } else {
1408 strcpy(s, sub);
1409 strcpy(s + m, tmp);
1410 free(tmp);
1411 len += m - (n + 1);
1412 s += m - 1;
1413 i += m - 1;
1414 *subst = 1;
1415 }
1416 if (freeit) {
1417 free(sub);
1418 }
1419 }
1420 }
1421 }
1422 s++;
1423 i++;
1424 }
1425
1426 return err;
1427 }
1428