1 /* Code for the "new tokenizer", which replaced the old command
2    parser in interact.c in September 2014, following the 1.9.92
3    release of gretl.
4 */
5 
6 #define CDEBUG 0
7 #define TDEBUG 0
8 
9 /* allow deprecated "addobs" for "dataset addobs"? */
10 #define ALLOW_ADDOBS 1
11 
12 /* experiment: allow statements separated by ';' */
13 #define SEMIC_TEST 0
14 
15 typedef enum {
16     CI_LIST  = 1 << 0,  /* list may be present */
17     CI_LLEN1 = 1 << 1,  /* list must contain exactly 1 member */
18     CI_LLEN2 = 1 << 2,  /* list must contain exactly 2 members */
19     CI_ORD1  = 1 << 3,  /* args start with order specifier */
20     CI_ORD2  = 1 << 4,  /* args (may) end with order */
21     CI_L1INT = 1 << 5,  /* first portion of list contains ints */
22     CI_PARM1 = 1 << 6,  /* args start with (non-list) parameter */
23     CI_PARM2 = 1 << 7,  /* may use second param */
24     CI_FNAME = 1 << 8,  /* (first) parameter is filename */
25     CI_EXPR  = 1 << 9,  /* uses "genr"-type expression */
26     CI_VARGS = 1 << 10, /* ends with varargs field */
27     CI_EXTRA = 1 << 11, /* uses some special "extra" feature */
28     CI_ADHOC = 1 << 12, /* needs special-purpose parser */
29     CI_DOALL = 1 << 13, /* operates on all series if no list given */
30     CI_NOOPT = 1 << 14, /* command never takes any options */
31     CI_BLOCK = 1 << 15, /* command starts a block */
32     CI_FFORM = 1 << 16, /* command also has function-form */
33     CI_LCHK  = 1 << 17, /* needs checking for "list" specials */
34     CI_INFL  = 1 << 18, /* command arglist "inflected" by options */
35     CI_FCMIN = 1 << 19, /* minimal (single word) flow control */
36     CI_LGEN  = 1 << 20, /* command generates a named list */
37     CI_OBSOL = 1 << 21  /* command is obsolete and therefore deprecated */
38 } CIFlags;
39 
40 struct gretl_cmd {
41     int cnum;
42     const char *cword;
43     CIFlags flags;
44 };
45 
46 /* Note: the flags CI_EXPR, CI_VARGS and CI_ADHOC all indicate that
47    the command in question carries (or may carry) material that gets
48    passed on for parsing elsewhere (i.e. not in this translation
49    unit).
50 
51    In the case of CI_EXPR and CI_VARGS, this material (i) is a
52    strictly trailing portion of the command line and (ii) is destined
53    for handling via "genr". Such commands never take options (so the
54    CI_NOOPT flag would be redundant). The unparsed material can
55    therefore simply take the form of a const char pointer into the
56    command line (namely, the @vstart member of the CMD struct). The
57    CI_VARGS case differs from CI_EXPR in that the former requires the
58    parsing here of either one or two leading parameters; there are no
59    such parameters with CI_EXPR.
60 
61    CI_ADHOC is used for cases where a certain chunk of the command
62    line needs to be packaged up for specialized parsing
63    elsewhere. Options may follow the chunk in question; it's not a
64    strictly trailing portion of the line. The ad hoc material is
65    packaged into either the @param or @parm2 member of the CMD struct.
66 
67    CI_EXTRA is used where some (possibly optional) element of a
68    command does not fit neatly into the slots of @list, @param and
69    @parm2. The distinction between CI_ADHOC and CI_EXTRA is not
70    totally rigorous, but it seems to be useful to have both flags;
71    the idea is that CI_EXTRA is used when the "special" portion of
72    a command line is a limited subset of the line.
73 */
74 
75 static struct gretl_cmd gretl_cmds[] = {
76     { 0,        "",         0},
77     { ADD,      "add",      CI_LIST },
78     { ADF,      "adf",      CI_ORD1 | CI_LIST },
79     { ANOVA,    "anova",    CI_LIST },
80     { APPEND,   "append",   CI_PARM1 | CI_FNAME },
81     { AR,       "ar",       CI_LIST | CI_L1INT },
82     { AR1,      "ar1",      CI_LIST },
83     { ARCH,     "arch",     CI_ORD1 | CI_LIST },
84     { ARMA,     "arima",    CI_LIST | CI_L1INT },
85     { BDS,      "bds",      CI_ORD1 | CI_LIST | CI_LLEN1 },
86     { BIPROBIT, "biprobit", CI_LIST },
87     { BKW,      "bkw",      0 },
88     { BREAK,    "break",    CI_NOOPT | CI_FCMIN },
89     { BXPLOT,   "boxplot",  CI_LIST | CI_EXTRA | CI_INFL },
90     { CHOW,     "chow",     CI_PARM1 },
91     { CLEAR,    "clear",    0 },
92     { COEFFSUM, "coeffsum", CI_LIST  },
93     { COINT,    "coint",    CI_ORD1 | CI_LIST },
94     { COINT2,   "johansen", CI_ORD1 | CI_LIST },
95     { CORR,     "corr",     CI_LIST | CI_DOALL },
96     { CORRGM,   "corrgm",   CI_LIST | CI_LLEN1 | CI_ORD2 },
97     { CUSUM,    "cusum",    0 },
98     { DATA,     "data",     CI_ADHOC }, /* special: needs whole line */
99     { DATAMOD,  "dataset",  CI_PARM1 | CI_LIST | CI_PARM2 },
100     { DELEET,   "delete",   CI_PARM1 | CI_INFL }, /* or CI_LIST */
101     { DIFF,     "diff",     CI_LIST },
102     { DIFFTEST, "difftest", CI_LIST | CI_LLEN2 },
103     { DISCRETE, "discrete", CI_LIST },
104     { DPANEL  , "dpanel",   CI_LIST | CI_L1INT },
105     { DUMMIFY,  "dummify",  CI_LIST },
106     { DURATION, "duration", CI_LIST },
107     { ELIF,     "elif",     CI_EXPR },
108     { ELSE,     "else",     CI_NOOPT | CI_FCMIN },
109     { END,      "end",      CI_PARM1 },
110     { ENDIF,    "endif",    CI_NOOPT | CI_FCMIN },
111     { ENDLOOP,  "endloop",  CI_NOOPT | CI_FCMIN },
112     { EQNPRINT, "eqnprint", 0 }, /* special, handled later */
113     { EQUATION, "equation", CI_LIST },
114     { ESTIMATE, "estimate", CI_PARM1 | CI_PARM2 }, /* params optional */
115     { EVAL,     "eval",     CI_EXPR },
116     { FCAST,    "fcast",    CI_ADHOC },
117     { FLUSH,    "flush",    CI_NOOPT },
118     { FOREIGN,  "foreign",  CI_PARM1 | CI_BLOCK },
119     { FRACTINT, "fractint", CI_LIST | CI_LLEN1 | CI_ORD2 },
120     { FREQ,     "freq",     CI_LIST | CI_LLEN1 },
121     { FUNC,     "function", CI_ADHOC | CI_NOOPT | CI_BLOCK },
122     { FUNCERR,  "funcerr",  CI_PARM1 | CI_NOOPT },
123     { GARCH,    "garch",    CI_LIST | CI_L1INT },
124     { GENR,     "genr",     CI_EXPR },
125     { GMM,      "gmm",      CI_EXPR | CI_BLOCK },
126     { GNUPLOT,  "gnuplot",  CI_LIST | CI_EXTRA | CI_INFL },
127     { GRAPHPG,  "graphpg",  CI_PARM1 | CI_PARM2 }, /* params optional */
128     { HECKIT,   "heckit",   CI_LIST },
129     { HELP,     "help",     CI_PARM1 },
130     { HFPLOT,   "hfplot",   CI_LIST | CI_EXTRA },
131     { HSK,      "hsk",      CI_LIST },
132     { HURST,    "hurst",    CI_LIST | CI_LLEN1 },
133     { IF,       "if",       CI_EXPR },
134     { INCLUDE,  "include",  CI_PARM1 | CI_FNAME },
135     { INFO,     "info",     CI_NOOPT },
136     { INTREG,   "intreg",   CI_LIST },
137     { JOIN,     "join",     CI_PARM1 | CI_FNAME | CI_EXTRA },
138     { KPSS,     "kpss",     CI_ORD1 | CI_LIST },
139     { LABELS,   "labels",   CI_LIST | CI_DOALL },
140     { LAD,      "lad",      CI_LIST },
141     { LAGS,     "lags",     CI_ORD1 | CI_LIST },
142     { LDIFF,    "ldiff",    CI_LIST | CI_NOOPT },
143     { LEVERAGE, "leverage", 0 },
144     { LEVINLIN, "levinlin", CI_PARM1 | CI_LIST | CI_LLEN1 },
145     { LOGISTIC, "logistic", CI_LIST },
146     { LOGIT,    "logit",    CI_LIST },
147     { LOGS,     "logs",     CI_LIST | CI_NOOPT },
148     { LOOP,     "loop",     CI_ADHOC }, /* ? */
149     { MAHAL,    "mahal",    CI_LIST },
150     { MAKEPKG,  "makepkg",  CI_PARM1 },
151     { MARKERS,  "markers",  0 },
152     { MEANTEST, "meantest", CI_LIST | CI_LLEN2 },
153     { MIDASREG, "midasreg", CI_LIST },
154     { MLE,      "mle",      CI_EXPR | CI_BLOCK },
155     { MODELTAB, "modeltab", CI_PARM1 | CI_INFL },
156     { MODPRINT, "modprint", CI_PARM1 | CI_PARM2 | CI_EXTRA },
157     { MODTEST,  "modtest",  CI_ORD1 },
158     { MPI,      "mpi",      CI_BLOCK },
159     { MPOLS,    "mpols",    CI_LIST },
160     { NEGBIN,   "negbin",   CI_LIST },
161     { NLS,      "nls",      CI_EXPR | CI_BLOCK },
162     { NORMTEST, "normtest", CI_LIST | CI_LLEN1 },
163     { NULLDATA, "nulldata", CI_ORD1 },
164     { OLS,      "ols",      CI_LIST },
165     { OMIT,     "omit",     CI_LIST },
166     { OPEN,     "open",     CI_PARM1 | CI_FNAME | CI_INFL }, /* + ODBC specials */
167     { ORTHDEV,  "orthdev",  CI_LIST | CI_NOOPT },
168     { OUTFILE,  "outfile",  CI_PARM1 | CI_FNAME | CI_INFL },
169     { PANEL,    "panel",    CI_LIST },
170     { PANPLOT,  "panplot",  CI_LIST | CI_LLEN1 | CI_EXTRA },
171     { PANSPEC,  "panspec",  0 },
172     { PCA,      "pca",      CI_LIST | CI_DOALL },
173     { PERGM,    "pergm",    CI_LIST | CI_LLEN1 | CI_ORD2 },
174     { PLOT,     "plot",     CI_BLOCK | CI_PARM1 },
175     { POISSON,  "poisson",  CI_LIST },
176     { PRINT,    "print",    CI_INFL }, /* special: handled later */
177     { PRINTF,   "printf",   CI_PARM1 | CI_VARGS },
178     { PROBIT,   "probit",   CI_LIST },
179     { PVAL,     "pvalue",   CI_ADHOC | CI_NOOPT },
180     { QUANTREG, "quantreg", CI_PARM1 | CI_LIST },
181     { QLRTEST,  "qlrtest",  0 },
182     { QQPLOT,   "qqplot",   CI_LIST },
183     { QUIT,     "quit",     CI_NOOPT },
184     { RENAME,   "rename",   CI_PARM1 | CI_PARM2 },
185     { RESET,    "reset",    0 },
186     { RESTRICT, "restrict", CI_PARM1 | CI_BLOCK },
187     { RMPLOT,   "rmplot",   CI_LIST | CI_LLEN1 },
188     { RUN,      "run",      CI_PARM1 | CI_FNAME | CI_NOOPT },
189     { RUNS,     "runs",     CI_LIST | CI_LLEN1 },
190     { SCATTERS, "scatters", CI_LIST },
191     { SDIFF,    "sdiff",    CI_LIST | CI_NOOPT },
192     { SET,      "set",      CI_PARM1 | CI_PARM2 | CI_INFL },
193     { SETINFO,  "setinfo",  CI_LIST | CI_LLEN1 | CI_INFL }, /* + special: handled later */
194     { SETOBS,   "setobs",   CI_PARM1 | CI_PARM2 },
195     { SETOPT,   "setopt",   CI_PARM1 | CI_PARM2 },
196     { SETMISS,  "setmiss",  CI_PARM1 | CI_LIST | CI_DOALL },
197     { SHELL,    "shell",    CI_EXPR },
198     { SMPL,     "smpl",     CI_PARM1 | CI_PARM2 | CI_INFL }, /* alternate forms */
199     { SPEARMAN, "spearman", CI_LIST | CI_LLEN2 },
200     { SPRINTF,  "sprintf",  CI_PARM1 | CI_PARM2 | CI_VARGS },
201     { SQUARE,   "square",   CI_LIST },
202     { SSCANF,   "sscanf",   CI_EXPR },
203     { STDIZE,   "stdize",   CI_LIST },
204     { STORE,    "store",    CI_PARM1 | CI_FNAME | CI_LIST | CI_DOALL },
205     { SUMMARY,  "summary",  CI_LIST | CI_DOALL },
206     { SYSTEM,   "system",   CI_PARM1 | CI_BLOCK },
207     { TABPRINT, "tabprint", 0 }, /* special, handled later */
208     { TEXTPLOT, "textplot", CI_LIST },
209     { TOBIT,    "tobit",    CI_LIST },
210     { IVREG,    "tsls",     CI_LIST },
211     { VAR,      "var",      CI_ORD1 | CI_LIST },
212     { VARLIST,  "varlist",  0 },
213     { VARTEST,  "vartest",  CI_LIST | CI_LLEN2 | CI_NOOPT },
214     { VECM,     "vecm",     CI_ORD1 | CI_LIST },
215     { VIF,      "vif",      0 },
216     { WLS,      "wls",      CI_LIST },
217     { XCORRGM,  "xcorrgm",  CI_LIST | CI_LLEN2 | CI_ORD2 },
218     { XTAB,     "xtab",     CI_LIST | CI_INFL },
219     { FUNDEBUG, "debug",    CI_PARM1 },
220     { FUNCRET,  "return",   CI_EXPR },
221     { CATCH,    "catch",    0 },
222     { PKG,      "pkg",      CI_PARM1 | CI_PARM2 },
223     { NC,       NULL,       0 }
224 };
225 
226 #define not_catchable(c) (c == IF || c == ENDIF || c == ELIF || \
227 			  c == FUNC)
228 
229 #define param_optional(c) (c == SET || c == HELP || c == RESTRICT || \
230 			   c == SMPL || c == SYSTEM || c == FUNCERR || \
231 			   c == GRAPHPG || c == PLOT || c == OUTFILE)
232 
233 #define parm2_optional(c) (c == SET || c == SETOPT || c == SETOBS || \
234 			   c == ESTIMATE || c == HELP || c == GRAPHPG || \
235 			   c == EQUATION || c == MODPRINT)
236 
237 #define vargs_optional(c) (c == PRINTF || c == SPRINTF)
238 
239 #define expr_keep_cmdword(c) (c == GMM || c == MLE || c == NLS)
240 
241 #define has_function_form(c) (gretl_cmds[c].flags & CI_FFORM)
242 
243 #define option_inflected(c) (c->ciflags & CI_INFL)
244 
245 #define simple_flow_control(c) (c->ciflags & CI_FCMIN)
246 
command_get_flags(int ci)247 static int command_get_flags (int ci)
248 {
249     if (ci >= 0 && ci < NC) {
250 	return gretl_cmds[ci].flags;
251     } else {
252 	return 0;
253     }
254 }
255 
never_takes_options(CMD * c)256 static int never_takes_options (CMD *c)
257 {
258     return c->ciflags & (CI_EXPR | CI_VARGS | CI_NOOPT);
259 }
260 
check_for_shadowed_commands(void)261 static void check_for_shadowed_commands (void)
262 {
263     int i;
264 
265     for (i=1; i<NC; i++) {
266 	if (function_lookup(gretl_cmds[i].cword)) {
267 	    gretl_cmds[i].flags |= CI_FFORM;
268 	}
269     }
270 }
271 
272 /* Get the maximum number of (semicolon) separators
273    supported by the command with index @ci. For most
274    commands this is zero. If @pmin is non-NULL it
275    gets the minimum number of such separators needed
276    by the command.
277 */
278 
get_sep_max(int ci,int * pmin)279 static int get_sep_max (int ci, int *pmin)
280 {
281     /* default: semicolon separator neither required nor allowed */
282     int minsep = 0, maxsep = 0;
283 
284     switch (ci) {
285     case AR:
286     case GARCH:
287     case HECKIT:
288     case IVREG:
289     case MIDASREG:
290 	minsep = maxsep = 1;
291 	break;
292     case DPANEL:
293     case ARMA:
294 	minsep = 1;
295 	maxsep = 2;
296 	break;
297     case COINT2:
298     case VECM:
299 	maxsep = 2;
300 	break;
301     case BIPROBIT:
302     case DURATION:
303     case EQUATION:
304     case MPOLS:
305     case NEGBIN:
306     case POISSON:
307     case VAR:
308     case XTAB:
309     case LAGS:
310     case SCATTERS:
311     case HFPLOT:
312     case SMPL:
313 	maxsep = 1;
314 	break;
315     default:
316 	break;
317     }
318 
319     if (pmin != NULL) {
320 	*pmin = minsep;
321     }
322 
323     return maxsep;
324 }
325 
326 /* The difference between TOK_JOINED and TOK_NOGAP below is that we
327    don't count a comma with no gap to its left as JOINED, since it's
328    generally punctuation, but we do record it as NOGAP, since this
329    info can be relevant when reconstructing an option parameter.
330 */
331 
332 enum {
333     TOK_JOINED = 1 << 0, /* token is joined on the left (no space) */
334     TOK_NOGAP  = 1 << 1, /* as TOK_JOINED but including joined comma */
335     TOK_DONE   = 1 << 2, /* token has been handled */
336     TOK_QUOTED = 1 << 3, /* token was found in double quotes */
337     TOK_IGNORE = 1 << 4, /* token not actually wanted, ignored */
338     TOK_LSTR   = 1 << 5  /* token provisionally added to list string */
339 } TokenFlags;
340 
341 enum {
342     TOK_NAME,    /* potentially valid identifier (not quoted) */
343     TOK_DOLSTR,  /* '$' plus potentially valid identifier */
344     TOK_OPT,     /* long-form option flag */
345     TOK_SOPT,    /* short-form option flag */
346     TOK_CATCH,   /* "catch" keyword */
347     TOK_STRING,  /* string in double quotes */
348     TOK_NUMBER,  /* numeric, plus 'colonized' dates such as 1990:1 */
349     TOK_INT,     /* integer (may start with '+' or '-') */
350     TOK_DASH,    /* single dash */
351     TOK_DDASH,   /* double dash */
352     TOK_DPLUS,   /* double plus */
353     TOK_EQUALS,  /* equals sign by itself */
354     TOK_EQMOD,   /* "+=", "-=", etc. */
355     TOK_ASSIGN,  /* assignment to object, "<-" */
356     TOK_SEMIC,   /* semicolon */
357     TOK_COLON,   /* colon */
358     TOK_DOT,     /* single dot */
359     TOK_COMMA,   /* single comma */
360     TOK_DDOT,    /* double dot */
361     TOK_PRSTR,   /* string in parentheses */
362     TOK_BRSTR,   /* string in square brackets */
363     TOK_CBSTR,   /* string in curly braces */
364     TOK_OPTDASH, /* dash preceding an option flag */
365     TOK_OPTEQ,   /* '=' that joins option flag to value */
366     TOK_OPTVAL,  /* value attached to option flag */
367     TOK_AST,     /* single asterisk */
368     TOK_SYMB,    /* symbols, not otherwise handled */
369     TOK_EVAL     /* string that needs to be eval'd */
370 } TokenTypes;
371 
372 struct cmd_token_ {
373     char *s;         /* allocated token string */
374     const char *lp;  /* pointer to line position */
375     guint8 type;     /* one of TokenTypes */
376     guint8 flag;     /* zero or more of TokenFlags */
377 };
378 
379 #define token_joined(t)  (t->flag & TOK_JOINED)
380 #define token_done(t)    (t->flag & TOK_DONE)
381 #define token_ignored(t) (t->flag & TOK_IGNORE)
382 
383 #define mark_token_done(t) (t.flag |= TOK_DONE)
384 #define mark_list_token_done(t) (t.flag |= (TOK_DONE|TOK_LSTR))
385 #define mark_token_ignored(t) (t.flag |= (TOK_DONE|TOK_IGNORE))
386 
387 #define option_type(t) (t == TOK_OPT || t == TOK_SOPT || \
388 			t == TOK_OPTDASH || t == TOK_OPTEQ || \
389 			t == TOK_OPTVAL)
390 
391 #define delimited_type(t) (t == TOK_STRING || \
392 			   t == TOK_PRSTR ||  \
393 			   t == TOK_CBSTR ||  \
394 			   t == TOK_BRSTR)
395 
396 #define bracketed_type(t) (t == TOK_PRSTR || t == TOK_BRSTR)
397 
398 #define wildsym(t) (t->type == TOK_AST || \
399 		    (t->type == TOK_SYMB && t->s[0] == '?'))
400 
cmd_token_init(cmd_token * t)401 static void cmd_token_init (cmd_token *t)
402 {
403     t->s = NULL;
404     t->lp = NULL;
405     t->type = 0;
406     t->flag = 0;
407 }
408 
cmd_token_clear(cmd_token * t)409 static void cmd_token_clear (cmd_token *t)
410 {
411     if (t != NULL) {
412 	free(t->s);
413 	cmd_token_init(t);
414     }
415 }
416 
gretl_cmd_free(CMD * cmd)417 void gretl_cmd_free (CMD *cmd)
418 {
419     char *s;
420     int i;
421 
422     for (i=0; i<cmd->ntoks; i++) {
423 	s = cmd->toks[i].s;
424 	if (s != cmd->param && s != cmd->parm2) {
425 	    free(s);
426 	}
427     }
428 
429     free(cmd->list);
430     free(cmd->param);
431     free(cmd->parm2);
432     free(cmd->auxlist);
433 
434     free(cmd->toks);
435 }
436 
gretl_cmd_destroy(CMD * cmd)437 void gretl_cmd_destroy (CMD *cmd)
438 {
439     gretl_cmd_free(cmd);
440     free(cmd);
441 }
442 
gretl_cmd_new(void)443 CMD *gretl_cmd_new (void)
444 {
445     CMD *cmd = malloc(sizeof *cmd);
446 
447     if (cmd != NULL) {
448 	gretl_cmd_init(cmd);
449     }
450 
451     return cmd;
452 }
453 
gretl_cmd_init(CMD * c)454 int gretl_cmd_init (CMD *c)
455 {
456     int i, n = 16;
457     int err = 0;
458 
459     c->ci = 0;
460     c->err = 0;
461     c->context = 0;
462     c->ciflags = 0;
463     c->opt = 0;
464     c->flags = 0;
465     c->order = 0;
466     c->auxint = 0;
467     c->cstart = 0;
468     c->ntoks = 0;
469     c->nt_alloced = 0;
470     c->toks = NULL;
471     c->vstart = NULL;
472     c->param = NULL;
473     c->parm2 = NULL;
474     c->list = NULL;
475     c->auxlist = NULL;
476 
477     *c->savename = '\0';
478     c->gtype = GRETL_TYPE_ANY;
479 
480     c->toks = malloc(n * sizeof *c->toks);
481     if (c->toks == NULL) {
482 	return E_ALLOC;
483     }
484 
485     for (i=0; i<n; i++) {
486 	cmd_token_init(&c->toks[i]);
487     }
488 
489     if (err) {
490 	gretl_cmd_destroy(c);
491     } else {
492 	c->nt_alloced = n;
493     }
494 
495     return err;
496 }
497 
gretl_cmd_clear(CMD * c)498 static void gretl_cmd_clear (CMD *c)
499 {
500     cmd_token *tok;
501     int i, ci = c->ci;
502 
503 #if CDEBUG
504     fprintf(stderr, "gretl_cmd_clear: ci = %d (%s), context = %d\n\n", ci,
505 	    gretl_command_word(ci), c->context);
506 #endif
507 
508     if (ci == END && c->param != NULL) {
509 	ci = gretl_command_number(c->param);
510     }
511 
512     for (i=0; i<c->ntoks; i++) {
513 	tok = &c->toks[i];
514 	if (tok->s == c->param) {
515 	    /* avoid double-freeing */
516 	    c->param = NULL;
517 	} else if (tok->s == c->parm2) {
518 	    c->parm2 = NULL;
519 	}
520 	cmd_token_clear(tok);
521     }
522 
523     /* FIXME: do the next step only if the CMD has actually
524        been parsed/assembled, not just tokenized (as in
525        get_command_index)?
526     */
527 
528     if (c->context > 0) {
529 	; /* don't clear any pending options */
530     } else if (ci > 0 && ci != SETOPT &&
531 	!(c->ciflags & (CI_NOOPT | CI_EXPR | CI_VARGS))) {
532 	clear_stored_options_for_command(ci);
533     }
534 
535     c->ci = 0;
536     c->err = 0;
537     c->ciflags = 0;
538     c->opt = 0;
539     c->order = 0;
540     c->auxint = 0;
541     c->cstart = 0;
542     c->ntoks = 0;
543     c->vstart = NULL;
544     c->gtype = GRETL_TYPE_ANY;
545 
546     /* Note: c->context, c->savename and the flag CMD_CATCH should
547        persist across in-block commands until end-of-block is
548        reached. But once we've exited a block (which is signaled by
549        context == 0), these elements should be cleared.
550        FIXME: same issue as above?
551     */
552 
553     if (c->context == 0) {
554 	*c->savename = '\0';
555 	c->flags &= ~CMD_CATCH;
556     }
557 
558     free(c->param);
559     free(c->parm2);
560     c->param = c->parm2 = NULL;
561 
562     free(c->list);
563     free(c->auxlist);
564     c->list = c->auxlist = NULL;
565 }
566 
real_add_token(CMD * c,const char * tok,const char * lp,char type,char flag)567 static int real_add_token (CMD *c, const char *tok,
568 			   const char *lp, char type,
569 			   char flag)
570 {
571     int n = c->ntoks;
572     int err = 0;
573 
574 #if TDEBUG
575     fprintf(stderr, "real_add_token: '%s'\n", tok);
576 #endif
577 
578     if (n == c->nt_alloced - 1) {
579 	/* we've used all the existing slots */
580 	int i, nt_new = c->nt_alloced * 2;
581 	cmd_token *toks;
582 
583 	toks = realloc(c->toks, nt_new * sizeof *toks);
584 
585 	if (toks == NULL) {
586 	    err = E_ALLOC;
587 	} else {
588 	    for (i=c->nt_alloced; i<nt_new; i++) {
589 		cmd_token_init(&toks[i]);
590 	    }
591 	    c->toks = toks;
592 	    c->nt_alloced = nt_new;
593 	}
594     }
595 
596     if (!err) {
597 	c->toks[n].s = gretl_strdup(tok);
598 	c->toks[n].lp = lp;
599 	c->toks[n].type = type;
600 	c->toks[n].flag = flag;
601 	c->ntoks += 1;
602     }
603 
604     return err;
605 }
606 
push_token(CMD * c,const char * tok,const char * s,int pos,char type,char flag)607 static int push_token (CMD *c, const char *tok, const char *s,
608 		       int pos, char type, char flag)
609 {
610     if (pos > 0 && !isspace(*(s-1))) {
611 	flag |= TOK_NOGAP;
612 	if (type != TOK_COMMA) {
613 	    flag |= TOK_JOINED;
614 	}
615     }
616 
617     return real_add_token(c, tok, s, type, flag);
618 }
619 
push_string_token(CMD * c,const char * tok,const char * s,int pos)620 static int push_string_token (CMD *c, const char *tok,
621 			      const char *s, int pos)
622 {
623     char type = TOK_NAME;
624 
625     if (c->ntoks == 0 && !strcmp(tok, "catch")) {
626 	type = TOK_CATCH;
627     } else if (*tok == '$') {
628 	type = TOK_DOLSTR;
629     } else if (!strncmp(tok, "eval(", 5)) {
630 	char strvar[VNAMELEN];
631 
632 	if (sscanf(tok, "eval(%31[^)]", strvar) == 1 &&
633 	    tok[strlen(tok)-1] == ')') {
634 	    return push_token(c, strvar, s, pos, TOK_EVAL, 0);
635 	}
636     }
637 
638     return push_token(c, tok, s, pos, type, 0);
639 }
640 
push_symbol_token(CMD * c,const char * tok,char * s,int pos)641 static int push_symbol_token (CMD *c, const char *tok,
642 			      char *s, int pos)
643 {
644     char type = TOK_SYMB;
645 
646     if (!strcmp(tok, "-")) {
647 	type = TOK_DASH;
648     } else if (!strcmp(tok, ".")) {
649 	type = TOK_DOT;
650     } else if (!strcmp(tok, ",")) {
651 	type = TOK_COMMA;
652     } else if (!strcmp(tok, ";")) {
653 	type = TOK_SEMIC;
654     } else if (!strcmp(tok, ":")) {
655 	type = TOK_COLON;
656     } else if (!strcmp(tok, "--")) {
657 	type = TOK_DDASH;
658     } else if (!strcmp(tok, "++")) {
659 	type = TOK_DPLUS;
660     } else if (!strcmp(tok, "..")) {
661 	type = TOK_DDOT;
662     } else if (!strcmp(tok, "=")) {
663 	type = TOK_EQUALS;
664     } else if (!strcmp(tok, "*")) {
665 	type = TOK_AST;
666     } else if (strlen(tok) == 2 && tok[1] == '=') {
667 	type = TOK_EQMOD;
668     } else if (c->ntoks == 1 && !strcmp(tok, "<-")) {
669 	/* FIXME allow for "catch" */
670 	type = TOK_ASSIGN;
671     }
672 
673     if (type == TOK_SEMIC) {
674 	if (c->ci > 0 && get_sep_max(c->ci, NULL) == 0) {
675 	    gretl_errmsg_sprintf(_("The symbol '%c' is not valid in this context\n"),
676 				 ';');
677 	    return E_PARSE;
678 	}
679     }
680 
681     return push_token(c, tok, s, pos, type, 0);
682 }
683 
digit_spn(const char * s)684 static unsigned int digit_spn (const char *s)
685 {
686     const char *digits = "0123456789";
687 
688     return strspn(s, digits);
689 }
690 
push_numeric_token(CMD * c,const char * tok,const char * s,int pos)691 static int push_numeric_token (CMD *c, const char *tok,
692 			       const char *s, int pos)
693 {
694     char type = TOK_NUMBER;
695     const char *test = tok;
696 
697     if (*tok == '+' || *tok == '-') {
698 	test++;
699     }
700 
701     if (strlen(test) == digit_spn(test)) {
702 	type = TOK_INT;
703     }
704 
705     return push_token(c, tok, s, pos, type, 0);
706 }
707 
708 #define ldelim(c) (c == '(' || c == '{' || c == '[')
709 
push_delimited_token(CMD * c,const char * tok,const char * s,int pos)710 static int push_delimited_token (CMD *c, const char *tok,
711 				 const char *s, int pos)
712 {
713     char type = TOK_PRSTR;
714 
715     if (*s == '{') {
716 	type = TOK_CBSTR;
717     } else if (*s == '[') {
718 	type = TOK_BRSTR;
719     } else if (*s == '"') {
720 	type = TOK_STRING;
721     }
722 
723     return push_token(c, tok, s, pos, type, 0);
724 }
725 
push_quoted_token(CMD * c,const char * s,int len,int pos)726 static int push_quoted_token (CMD *c, const char *s,
727 			      int len, int pos)
728 {
729     char *tok = malloc(len + 1);
730     int err = 0;
731 
732     if (tok == NULL) {
733 	err = E_ALLOC;
734     } else {
735 	const char *p = s + 1;
736 	int i = 0;
737 
738 	if (c->ci == PRINT) {
739 	    *tok = '\0';
740 	    strncat(tok, p, len);
741 	} else if (c->ci == PRINTF || c->ci == SPRINTF) {
742 	    /* format strings: don't mess! */
743 	    while (*p) {
744 		if (*p == '"' && *(p-1) != '\\') {
745 		    tok[i] = '\0';
746 		    break;
747 		} else {
748 		    tok[i++] = *p++;
749 		}
750 	    }
751 	} else {
752 	    /* unescape escaped quotes */
753 	    while (*p) {
754 		if (*p == '\\' && *(p+1) == '"') {
755 		    tok[i++] = '"';
756 		    p += 2;
757 		} else if (*p == '"') {
758 		    tok[i] = '\0';
759 		    break;
760 		} else {
761 		    tok[i++] = *p++;
762 		}
763 	    }
764 	}
765 
766 	err = push_token(c, tok, s, pos, TOK_STRING,
767 			 TOK_QUOTED);
768 	free(tok);
769     }
770 
771     return err;
772 }
773 
symbol_spn(const char * s)774 static int symbol_spn (const char *s)
775 {
776     const char *ok = "=+-/*<>?|~^!%&.,:;\\'";
777 
778     if (*s == '=' && *(s+1) != '=') {
779 	return 1;
780     }
781 
782     return strspn(s, ok);
783 }
784 
785 /* We'll treat observation identifiers such as "1995:04"
786    as numeric in this context, provided the string
787    starts with a digit.
788 */
789 
numeric_spn(const char * s,int digstart)790 static int numeric_spn (const char *s, int digstart)
791 {
792     char *endptr = NULL;
793     int n;
794 
795     strtod(s, &endptr);
796     n = endptr - s;
797 
798     if (n > 1 && s[n-1] == '.' && *endptr == '.') {
799 	/* trailing double-dot */
800 	return n - 1;
801     }
802 
803     if (digstart && *endptr == ':') {
804 	int m = digit_spn(endptr + 1);
805 
806 	if (m > 0) {
807 	    n += m + 1;
808 	}
809     }
810 
811     return n;
812 }
813 
namechar_spn(const char * s)814 static int namechar_spn (const char *s)
815 {
816     const char *ok = "abcdefghijklmnopqrstuvwxyz"
817 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
818 	"0123456789_";
819 
820     return strspn(s, ok);
821 }
822 
wild_spn(const char * s)823 static int wild_spn (const char *s)
824 {
825     const char *ok = "abcdefghijklmnopqrstuvwxyz"
826 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
827 	"0123456789_*";
828 
829     return strspn(s, ok);
830 }
831 
closing_quote_pos(const char * s,int ci)832 static int closing_quote_pos (const char *s, int ci)
833 {
834     int n = 0;
835 
836     s++;
837     while (*s) {
838 	if (ci == PRINT && *s == '"') {
839 	    return n;
840 	}
841 	if (*s == '"' && *(s-1) != '\\') {
842 	    return n;
843 	}
844 	s++;
845 	n++;
846     }
847 
848     return -1;
849 }
850 
matching_delim(int ltype)851 static int matching_delim (int ltype)
852 {
853     if (ltype == '(') {
854 	return ')';
855     } else if (ltype == '{') {
856 	return '}';
857     } else {
858 	return ']';
859     }
860 }
861 
closing_delimiter_pos(const char * s)862 static int closing_delimiter_pos (const char *s)
863 {
864     int ltype = *s;
865     int targ = matching_delim(ltype);
866     int quoted = 0;
867     int net = 1, n = 0;
868 
869     s++;
870     while (*s) {
871 	if (*s == '"') {
872 	    quoted = !quoted;
873 	} else if (!quoted) {
874 	    if (*s == ltype) {
875 		net++;
876 	    } else if (*s == targ) {
877 		net--;
878 		if (net == 0) {
879 		    return n;
880 		}
881 	    }
882 	}
883 	s++;
884 	n++;
885     }
886 
887     return -1;
888 }
889 
890 /* Determine the index of the first 'real command' token, beyond
891    "catch" or assignment to an object.
892 */
893 
min_token_index(CMD * c,int compmode)894 static int min_token_index (CMD *c, int compmode)
895 {
896     int pos = 0, apos = 1;
897 
898     if (c->toks[0].type == TOK_CATCH) {
899 	/* advance everything by one place */
900 	pos = 1;
901 	apos = 2;
902     }
903 
904     if (c->ntoks > apos && c->toks[apos].type == TOK_ASSIGN) {
905 	/* advance by assignment target and operator */
906 	pos += 2;
907     } else if (compmode && c->ntoks == 2 && c->toks[0].s[0] == '@') {
908 	/* got a @-term in pos 0, not a command word */
909 	pos = 1;
910     }
911 
912     c->cstart = pos;
913 
914     return c->cstart;
915 }
916 
917 /* old-style "-%c param" settings, etc. */
918 
pseudo_option(const char * s,int ci)919 static int pseudo_option (const char *s, int ci)
920 {
921     if (ci == EQNPRINT || ci == TABPRINT) {
922 	if (!strcmp(s, "f")) {
923 	    return 1;
924 	}
925     } else if (ci == SETINFO) {
926 	if (!strcmp(s, "d") || !strcmp(s, "n")) {
927 	    return 1;
928 	}
929     } else if (ci == SMPL || ci == SET) {
930 	/* short opts confusable with params */
931 	return 1;
932     }
933 
934     return 0;
935 }
936 
937 /* Find the number of bytes a token takes up in
938    the input line, including delimiters if
939    applicable.
940 */
941 
real_toklen(cmd_token * tok)942 static int real_toklen (cmd_token *tok)
943 {
944     int n = strlen(tok->s);
945 
946     if (delimited_type(tok->type)) {
947 	n += 2;
948     }
949 
950     return n;
951 }
952 
953 /* Build a string composed of tokens k1 to k2. */
954 
fuse_tokens(CMD * c,int k1,int k2,int n)955 static char *fuse_tokens (CMD *c, int k1, int k2, int n)
956 {
957     char *ret = malloc(n);
958     int i;
959 
960     if (ret == NULL) {
961 	c->err = E_ALLOC;
962     } else {
963 	*ret = '\0';
964 	for (i=k1; i<=k2; i++) {
965 	    if (c->toks[i].type == TOK_BRSTR) {
966 		strcat(ret, "[");
967 		strcat(ret, c->toks[i].s);
968 		strcat(ret, "]");
969 	    } else if (c->toks[i].type == TOK_PRSTR) {
970 		strcat(ret, "(");
971 		strcat(ret, c->toks[i].s);
972 		strcat(ret, ")");
973 	    } else {
974 		strcat(ret, c->toks[i].s);
975 	    }
976 	    mark_token_done(c->toks[i]);
977 	}
978     }
979 
980     return ret;
981 }
982 
merge_option_toks_l_to_r(CMD * c,int k1)983 static char *merge_option_toks_l_to_r (CMD *c, int k1)
984 {
985     cmd_token *tok = &c->toks[k1];
986     int n = real_toklen(tok) + 1;
987     int i, k2 = k1;
988 
989     for (i=k1+1; i<c->ntoks; i++) {
990 	tok = &c->toks[i];
991 	if ((tok->flag & TOK_NOGAP) && !token_done(tok)) {
992 	    n += real_toklen(tok);
993 	    k2 = i;
994 	} else {
995 	    break;
996 	}
997     }
998 
999     return fuse_tokens(c, k1, k2, n);
1000 }
1001 
1002 /* Merge tokens from position k1 rightward, stopping at the
1003    first token that is not left-joined.
1004 */
1005 
merge_toks_l_to_r(CMD * c,int k1)1006 static char *merge_toks_l_to_r (CMD *c, int k1)
1007 {
1008     cmd_token *tok = &c->toks[k1];
1009     int n = real_toklen(tok) + 1;
1010     int i, k2 = k1;
1011 
1012     for (i=k1+1; i<c->ntoks; i++) {
1013 	tok = &c->toks[i];
1014 	if (token_joined(tok) && !token_done(tok)) {
1015 	    n += real_toklen(tok);
1016 	    k2 = i;
1017 	} else {
1018 	    break;
1019 	}
1020     }
1021 
1022     return fuse_tokens(c, k1, k2, n);
1023 }
1024 
1025 /* Merge tokens from position k2 leftward, stopping at the
1026    first token that is not left-joined.
1027 */
1028 
merge_toks_r_to_l(CMD * c,int k2)1029 static char *merge_toks_r_to_l (CMD *c, int k2)
1030 {
1031     cmd_token *prevtok, *tok = &c->toks[k2];
1032     int n = real_toklen(tok) + 1;
1033     int i, k1 = k2;
1034 
1035     for (i=k2; i>c->cstart+1; i--) {
1036 	tok = &c->toks[i];
1037 	prevtok = &c->toks[i-1];
1038 	if (token_joined(tok) && !token_done(prevtok)) {
1039 	    n += real_toklen(prevtok);
1040 	    k1 = i - 1;
1041 	} else {
1042 	    break;
1043 	}
1044     }
1045 
1046     return fuse_tokens(c, k1, k2, n);
1047 }
1048 
1049 /* Mark short options (e.g. "-o") and long options (e.g. "--robust"),
1050    which may be followed by "=".  At this step we're just marking the
1051    tokens on a purely syntactical basis.
1052 */
1053 
mark_option_tokens(CMD * c)1054 static void mark_option_tokens (CMD *c)
1055 {
1056     cmd_token *tok, *prevtok;
1057     int i;
1058 
1059     for (i=1; i<c->ntoks; i++) {
1060 	tok = &c->toks[i];
1061 	if (tok->flag & TOK_NOGAP) {
1062 	    prevtok = &c->toks[i-1];
1063 	    if (!token_joined(prevtok)) {
1064 		if (prevtok->type == TOK_DDASH) {
1065 		    tok->type = TOK_OPT;
1066 		} else if (prevtok->type == TOK_DASH) {
1067 		    if (!pseudo_option(tok->s, c->ci)) {
1068 			tok->type = TOK_SOPT;
1069 		    }
1070 		}
1071 		if (tok->type == TOK_OPT || tok->type == TOK_SOPT) {
1072 		    prevtok->type = TOK_OPTDASH;
1073 		    prevtok->flag |= TOK_DONE;
1074 		}
1075 	    } else if (prevtok->type == TOK_OPT) {
1076 		if (tok->type == TOK_EQUALS) {
1077 		    tok->type = TOK_OPTEQ;
1078 		} else if (tok->type == TOK_DASH ||
1079 			   tok->type == TOK_NAME ||
1080 			   tok->type == TOK_INT) {
1081 		    tok->type = TOK_OPT; /* continuation of option flag */
1082 		}
1083 	    } else if (prevtok->type == TOK_OPTEQ) {
1084 		if (tok->type == TOK_NAME || tok->type == TOK_STRING) {
1085 		    tok->type = TOK_OPTVAL;
1086 		}
1087 	    } else if (prevtok->type == TOK_OPTVAL) {
1088 		if (tok->type == TOK_NAME || tok->type == TOK_DOT) {
1089 		    tok->type = TOK_OPTVAL;
1090 		}
1091 	    }
1092 	}
1093     }
1094 }
1095 
next_joined_token(CMD * c,int i)1096 static cmd_token *next_joined_token (CMD *c, int i)
1097 {
1098     if (i >= 0 && i < c->ntoks - 1) {
1099 	if (c->toks[i+1].flag & TOK_JOINED) {
1100 	    return &c->toks[i+1];
1101 	}
1102     }
1103 
1104     return NULL;
1105 }
1106 
next_nogap_token(CMD * c,int i)1107 static cmd_token *next_nogap_token (CMD *c, int i)
1108 {
1109     if (i >= 0 && i < c->ntoks - 1) {
1110 	if (c->toks[i+1].flag & TOK_NOGAP) {
1111 	    return &c->toks[i+1];
1112 	}
1113     }
1114 
1115     return NULL;
1116 }
1117 
handle_option_value(CMD * c,int i,gretlopt opt,OptStatus status)1118 static int handle_option_value (CMD *c, int i, gretlopt opt,
1119 				OptStatus status)
1120 {
1121     cmd_token *tok = &c->toks[i];
1122     cmd_token *nexttok = next_joined_token(c, i);
1123     int getval = 0;
1124 
1125     if (nexttok != NULL) {
1126 	if (status == OPT_NO_PARM) {
1127 	    /* nothing should be glued onto the option flag */
1128 	    c->err = E_PARSE;
1129 	} else if (nexttok->type == TOK_OPTEQ) {
1130 	    nexttok->flag |= TOK_DONE;
1131 	    getval = 1;
1132 	} else {
1133 	    /* the only acceptable following field is '=' */
1134 	    c->err = E_PARSE;
1135 	}
1136     }
1137 
1138     if (status == OPT_NEEDS_PARM && !getval && !c->err) {
1139 	/* we need but didn't get a value */
1140 	gretl_errmsg_sprintf(_("The option '--%s' requires a parameter"),
1141 			     tok->s);
1142 	c->err = E_BADOPT;
1143     }
1144 
1145     if (getval) {
1146 	char *val = NULL;
1147 
1148 	nexttok = next_nogap_token(c, i + 1);
1149 	if (nexttok == NULL) {
1150 	    c->err = E_PARSE;
1151 	} else if (delimited_type(nexttok->type)) {
1152 	    val = gretl_strdup(nexttok->s);
1153 	    nexttok->flag |= TOK_DONE;
1154 	} else {
1155 	    val = merge_option_toks_l_to_r(c, i + 2);
1156 	}
1157 
1158 #if CDEBUG > 1
1159 	fprintf(stderr, "option '--%s': param='%s'\n", tok->s,
1160 		(val == NULL)? "NULL" : val);
1161 #endif
1162 
1163 	if (val != NULL) {
1164 	    c->err = push_option_param(c->ci, opt, val);
1165 	    if (c->err) {
1166 		free(val);
1167 	    }
1168 	} else if (!c->err) {
1169 	    c->err = E_ALLOC;
1170 	}
1171     }
1172 
1173     return c->err;
1174 }
1175 
1176 #define OPTLEN 32
1177 
assemble_option_flag(CMD * c,cmd_token * tok,char * flag,int * pk,int dryrun)1178 static int assemble_option_flag (CMD *c, cmd_token *tok,
1179 				 char *flag, int *pk,
1180 				 int dryrun)
1181 {
1182     int i, n = 0, added = 0;
1183 
1184     *flag = '\0';
1185 
1186     for (i=*pk; i<c->ntoks; i++) {
1187 	tok = &c->toks[i];
1188 	if (tok->type == TOK_OPT) {
1189 	    n += strlen(tok->s);
1190 	    if (n >= OPTLEN) {
1191 		fprintf(stderr, "option string too long\n");
1192 		c->err = E_PARSE;
1193 		break;
1194 	    } else {
1195 		if (!dryrun) {
1196 		    tok->flag |= TOK_DONE;
1197 		}
1198 		strcat(flag, tok->s);
1199 		if (added) {
1200 		    *pk += 1;
1201 		} else {
1202 		    added = 1;
1203 		}
1204 	    }
1205 	} else {
1206 	    break;
1207 	}
1208     }
1209 
1210 #if CDEBUG > 2
1211     fprintf(stderr, "option flag: '%s'\n", flag);
1212 #endif
1213 
1214     return c->err;
1215 }
1216 
1217 /* For now, handle the old fit-type options whose strings
1218    are listed below. If would be nice to get rid of this
1219    altogether (in favour of the newer --fit=whatever
1220    option).
1221 */
1222 
handle_legacy_gnuplot_options(CMD * c)1223 static void handle_legacy_gnuplot_options (CMD *c)
1224 {
1225     /* these used to be options in their own right */
1226     const char *old_opts[] = {
1227 	"inverse-fit",
1228 	"loess-fit",
1229 	"quadratic-fit",
1230 	"linear-fit",
1231 	"semilog-fit",
1232 	"suppress-fitted"
1233     };
1234     /* but are replaced by these flags for --fit */
1235     const char *repl[] = {
1236 	"inverse",
1237 	"loess",
1238 	"quadratic",
1239 	"linear",
1240 	"semilog",
1241 	"none"
1242     };
1243     char optflag[OPTLEN];
1244     cmd_token *tok;
1245     int i, j, n, pos, len;
1246     int err, done = 0;
1247 
1248     n = G_N_ELEMENTS(old_opts);
1249 
1250     for (i=1; i<c->ntoks && !done; i++) {
1251 	tok = &c->toks[i];
1252 	if (token_done(tok)) {
1253 	    continue;
1254 	}
1255 	if (tok->type == TOK_OPT) {
1256 	    pos = i;
1257 	    err = assemble_option_flag(c, tok, optflag, &i, 1);
1258 	    if (err) {
1259 		break;
1260 	    }
1261 	    for (j=0; j<n && !done; j++) {
1262 		len = strlen(optflag);
1263 		if (len > 2 && !strncmp(optflag, old_opts[j], len)) {
1264 		    /* found an obsolete fit option */
1265 		    c->opt |= OPT_F;
1266 		    set_optval_string(GNUPLOT, OPT_F, repl[j]);
1267 		    for (j=pos; j<=i; j++) {
1268 			c->toks[j].flag |= TOK_DONE;
1269 		    }
1270 		    done = 1;
1271 		}
1272 	    }
1273 	}
1274     }
1275 }
1276 
1277 /* handle --remove and --purge, which used to be options
1278    to the defunct "install" command
1279 */
1280 
handle_legacy_install_options(CMD * c)1281 static void handle_legacy_install_options (CMD *c)
1282 {
1283     char optflag[OPTLEN];
1284     cmd_token *tok;
1285     int i, j, pos;
1286     int err, done = 0;
1287 
1288     for (i=1; i<c->ntoks && !done; i++) {
1289 	tok = &c->toks[i];
1290 	if (token_done(tok)) {
1291 	    continue;
1292 	}
1293 	if (tok->type == TOK_OPT) {
1294 	    pos = i;
1295 	    err = assemble_option_flag(c, tok, optflag, &i, 1);
1296 	    if (err) {
1297 		break;
1298 	    }
1299 	    if (!strcmp(optflag, "remove")) {
1300 		c->opt |= OPT_R;
1301 		done = 1;
1302 	    } else if (!strcmp(optflag, "purge")) {
1303 		c->opt |= OPT_P;
1304 		done = 1;
1305 	    }
1306 	    if (done) {
1307 		for (j=pos; j<=i; j++) {
1308 		    c->toks[j].flag |= TOK_DONE;
1309 		}
1310 	    }
1311 	}
1312     }
1313 }
1314 
check_command_options(CMD * c)1315 static int check_command_options (CMD *c)
1316 {
1317     cmd_token *tok;
1318     char optflag[OPTLEN];
1319     OptStatus status;
1320     gretlopt opt;
1321     int save_ci = c->ci;
1322     int i, j, n;
1323     int err = 0;
1324 
1325     mark_option_tokens(c);
1326 
1327     if (c->ci == END && c->context) {
1328 	c->ci = c->context;
1329     } else if (c->ci == SETOPT) {
1330 	c->ci = gretl_command_number(c->param);
1331     } else if (c->ci == GNUPLOT) {
1332 	handle_legacy_gnuplot_options(c);
1333     } else if (c->ci == PKG && (c->opt & OPT_B)) {
1334 	handle_legacy_install_options(c);
1335     }
1336 
1337     for (i=1; i<c->ntoks && !err; i++) {
1338 	tok = &c->toks[i];
1339 	if (token_done(tok)) {
1340 	    continue;
1341 	}
1342 	if (tok->type == TOK_OPT) {
1343 	    /* long-form option, possibly with attached value */
1344 	    err = assemble_option_flag(c, tok, optflag, &i, 0);
1345 	    if (!err) {
1346 		opt = valid_long_opt(c->ci, optflag, &status);
1347 		if (opt == OPT_NONE) {
1348 		    gretl_errmsg_sprintf(_("Invalid option '--%s'"), optflag);
1349 		    err = E_BADOPT;
1350 		} else {
1351 		    err = handle_option_value(c, i, opt, status);
1352 		    if (!err) {
1353 			c->opt |= opt;
1354 		    }
1355 		}
1356 	    }
1357 	} else if (tok->type == TOK_SOPT) {
1358 	    /* short-form option(s) */
1359 	    tok->flag |= TOK_DONE;
1360 	    n = strlen(tok->s);
1361 	    for (j=0; j<n && !err; j++) {
1362 		opt = valid_short_opt(c->ci, tok->s[j]);
1363 		if (opt == OPT_NONE) {
1364 		    /* error message handled in options.c */
1365 		    err = E_BADOPT;
1366 		} else {
1367 		    c->opt |= opt;
1368 		}
1369 	    }
1370 	}
1371     }
1372 
1373     if (!c->err && save_ci != SETOPT) {
1374 	/* Retrieve any options put in place via "setopt" */
1375 	maybe_get_stored_options(c->ci, &c->opt);
1376     }
1377 
1378     c->ci = save_ci;
1379 
1380     return err;
1381 }
1382 
1383 /* Get the 0-based token position of the 'real' command argument
1384    in 1-based position @k (i.e. k = 1 gets first real arg),
1385    or -1 if none.
1386 */
1387 
real_arg_index(CMD * c,int k)1388 static int real_arg_index (CMD *c, int k)
1389 {
1390     int i, i0 = c->cstart + k;
1391 
1392     for (i=i0; i<c->ntoks; i++) {
1393 	if (!(c->toks[i].flag & TOK_DONE)) {
1394 	    return i;
1395 	}
1396     }
1397 
1398     return -1;
1399 }
1400 
1401 /* Get the 0-based token position of the last command token
1402    that is not already handled (or -1 if none).
1403 */
1404 
last_arg_index(CMD * c)1405 static int last_arg_index (CMD *c)
1406 {
1407     int i;
1408 
1409     for (i=c->ntoks-1; i>c->cstart; i--) {
1410 	if (!(c->toks[i].flag & TOK_DONE)) {
1411 	    return i;
1412 	}
1413     }
1414 
1415     return -1;
1416 }
1417 
1418 /* Get the 0-based token position of the first command token
1419    that is not already handled (or -1 if none).
1420 */
1421 
first_unused_arg_index(CMD * c)1422 static int first_unused_arg_index (CMD *c)
1423 {
1424     int i;
1425 
1426     for (i=c->cstart; i<c->ntoks; i++) {
1427 	if (!(c->toks[i].flag & TOK_DONE)) {
1428 	    return i;
1429 	}
1430     }
1431 
1432     return -1;
1433 }
1434 
1435 /* Record the content of the token at @pos as either
1436    param or parm2, depending on @i.
1437 */
1438 
token_to_param(CMD * c,int pos,int i)1439 static int token_to_param (CMD *c, int pos, int i)
1440 {
1441     char *s = c->toks[pos].s;
1442 
1443     if (i == 1) {
1444 	c->param = s;
1445     } else if (i == 2) {
1446 	c->parm2 = s;
1447     } else {
1448 	c->err = E_PARSE;
1449     }
1450 
1451     return c->err;
1452 }
1453 
may_be_dash_parm(cmd_token * tok,CMD * c)1454 static int may_be_dash_parm (cmd_token *tok, CMD *c)
1455 {
1456     if (tok->type == TOK_NAME || tok->type == TOK_STRING) {
1457 	return 1;
1458     } else if ((c->ci == TABPRINT || c->ci == EQNPRINT) &&
1459 	       tok->type == TOK_SYMB && !strcmp(tok->s, "/")) {
1460 	return 1;
1461     } else {
1462 	return 0;
1463     }
1464 }
1465 
1466 /* legacy: look for, e.g., "-f filename" */
1467 
dash_char_index(CMD * c,const char * s)1468 static int dash_char_index (CMD *c, const char *s)
1469 {
1470     cmd_token *tok;
1471     int step = 0;
1472     int i;
1473 
1474     for (i=c->cstart+1; i<c->ntoks; i++) {
1475 	tok = &c->toks[i];
1476 	if (step == 0) {
1477 	    /* dash, freestanding to left */
1478 	    if (!token_done(tok) && tok->type == TOK_DASH &&
1479 		!token_joined(tok)) {
1480 		step = 1;
1481 	    }
1482 	} else if (step == 1) {
1483 	    /* suitable char token stuck to dash */
1484 	    if (!token_done(tok) && tok->type == TOK_NAME &&
1485 		token_joined(tok) && !strcmp(tok->s, s)) {
1486 		step = 2;
1487 	    } else {
1488 		step = 0;
1489 	    }
1490 	} else if (step == 2) {
1491 	    /* suitable following string token */
1492 	    if (!token_done(tok) && !token_joined(tok) &&
1493 		may_be_dash_parm(tok, c)) {
1494 		mark_token_done(c->toks[i-1]);
1495 		mark_token_done(c->toks[i-2]);
1496 		return i;
1497 	    } else {
1498 		step = 0;
1499 	    }
1500 	}
1501     }
1502 
1503     return -1;
1504 }
1505 
1506 /* handle TABPRINT and EQNPRINT */
1507 
legacy_get_filename(CMD * c)1508 static int legacy_get_filename (CMD *c)
1509 {
1510     int pos = dash_char_index(c, "f");
1511 
1512     if (pos > 0) {
1513 	if (c->toks[pos].type == TOK_STRING) {
1514 	    c->param = c->toks[pos].s;
1515 	    mark_token_done(c->toks[pos]);
1516 	} else if (next_joined_token(c, pos) == NULL) {
1517 	    c->param = c->toks[pos].s;
1518 	    mark_token_done(c->toks[pos]);
1519 	} else {
1520 	    c->param = merge_toks_l_to_r(c, pos);
1521 	}
1522     }
1523 
1524     return c->err;
1525 }
1526 
handle_datamod_param(CMD * c)1527 static int handle_datamod_param (CMD *c)
1528 {
1529     int op = dataset_op_from_string(c->param);
1530 
1531     /* some nasty legacy suff here: we should probably use
1532        options to clean this up */
1533 
1534     if (op == DS_NONE) {
1535 	c->err = E_PARSE;
1536     } else {
1537 	if (op == DS_SORTBY || op == DS_DSORTBY) {
1538 	    /* we now need a list, only */
1539 	    c->ciflags = CI_PARM1 | CI_LIST;
1540 	} else if (op == DS_RENUMBER) {
1541 	    /* we need a one-member list plus param */
1542 	    c->ciflags = CI_PARM1 | CI_LIST | CI_LLEN1 | CI_PARM2;
1543 	} else if (op == DS_COMPACT) {
1544 	    /* dataset compact: the second param may contain
1545 	       two fields, as in "4 last", so parse it as a
1546 	       case of 'extra'
1547 	    */
1548 	    c->ciflags = CI_PARM1 | CI_EXTRA;
1549 	} else if (op == DS_TRANSPOSE || op == DS_CLEAR) {
1550 	    /* no more fields wanted */
1551 	    c->ciflags = CI_PARM1;
1552 	} else {
1553 	    /* all other cases: no list wanted */
1554 	    c->ciflags &= ~CI_LIST;
1555 	}
1556     }
1557 
1558     if (!c->err) {
1559 	c->auxint = op;
1560     }
1561 
1562     return c->err;
1563 }
1564 
rebrace_string(const char * s,int * err)1565 static char *rebrace_string (const char *s, int *err)
1566 {
1567     char *ret = malloc(strlen(s) + 3);
1568 
1569     if (ret == NULL) {
1570 	*err = E_ALLOC;
1571     } else {
1572 	sprintf(ret, "{%s}", s);
1573     }
1574 
1575     return ret;
1576 }
1577 
1578 static int
looks_like_list_token(CMD * c,cmd_token * tok,const DATASET * dset)1579 looks_like_list_token (CMD *c, cmd_token *tok, const DATASET *dset)
1580 {
1581     if (tok->type != TOK_QUOTED) {
1582 	/* heuristic for a token that forms part of a series
1583 	   list: starts with a digit (series ID?), contains
1584 	   '*' (wildcard spec), or is the name of a series
1585 	*/
1586 	if (isdigit(*tok->s) ||
1587 	    strchr(tok->s, '*') != NULL ||
1588 	    current_series_index(dset, tok->s) >= 0) {
1589 	    return 1;
1590 	} else if (get_list_by_name(tok->s)) {
1591 	    /* should be treated as list, not param, iff
1592 	       this is a genuine "delete" command, not
1593 	       "list foo delete"
1594 	    */
1595 	    if (!strcmp(c->toks[c->cstart].s, "delete")) {
1596 		return 1;
1597 	    }
1598 	}
1599     }
1600 
1601     return 0;
1602 }
1603 
1604 /* Get command parameter in first position; may involve
1605    compositing tokens.
1606 */
1607 
get_param(CMD * c,const DATASET * dset)1608 static int get_param (CMD *c, const DATASET *dset)
1609 {
1610     int pos = real_arg_index(c, 1);
1611     cmd_token *tok;
1612 
1613 #if ALLOW_ADDOBS /* sigh */
1614     if (c->ci == DATAMOD && !strcmp(c->toks[0].s, "addobs")) {
1615 	c->param = c->toks[0].s;
1616 	return handle_datamod_param(c);
1617     }
1618 #endif
1619 
1620     if (pos < 0) {
1621 	if (!param_optional(c->ci)) {
1622 	    c->err = E_ARGS;
1623 	    fprintf(stderr, "%s: required param is missing\n",
1624 		    c->toks[c->cstart].s);
1625 	} else if (c->ci == SMPL) {
1626 	    /* allow the null form of "smpl" */
1627 	    c->ciflags &= ~CI_PARM2;
1628 	}
1629 	return c->err;
1630     }
1631 
1632     tok = &c->toks[pos];
1633 
1634     if (c->ci == DELEET && !(c->opt & OPT_L)) {
1635 	/* experimental */
1636 	if (looks_like_list_token(c, tok, dset)) {
1637 	    c->ciflags &= ~CI_PARM1;
1638 	    c->ciflags |= CI_LIST;
1639 	    return 0;
1640 	}
1641     }
1642 
1643     if (tok->type == TOK_CBSTR) {
1644 	/* if param was found in braces, it should
1645 	   probably be passed in braces, but FIXME
1646 	   check for exceptions? */
1647 	c->param = rebrace_string(tok->s, &c->err);
1648 	mark_token_done(c->toks[pos]);
1649     } else if (tok->type == TOK_EVAL) {
1650 	c->param = generate_string(tok->s, NULL, &c->err);
1651 	mark_token_done(c->toks[pos]);
1652     } else if (delimited_type(tok->type)) {
1653 	c->param = tok->s;
1654 	mark_token_done(c->toks[pos]);
1655     } else if (next_joined_token(c, pos) == NULL) {
1656 	c->param = tok->s;
1657 	mark_token_done(c->toks[pos]);
1658     } else {
1659 	c->param = merge_toks_l_to_r(c, pos);
1660     }
1661 
1662     if (c->ci == DATAMOD) {
1663 	/* "dataset" command: a legacy special! */
1664 	handle_datamod_param(c);
1665     } else if (c->ci == SMPL) {
1666 	/* "smpl" command: drop the requirement for a second param
1667 	   if the first is "full" */
1668 	if (!strcmp(c->param, "full")) {
1669 	    c->opt |= OPT_F;
1670 	    c->ciflags ^= CI_PARM2;
1671 	}
1672     } else if (c->ci == HELP) {
1673 	/* allow a second param for "help set ..." */
1674 	if (!strcmp(c->param, "set")) {
1675 	    c->ciflags |= CI_PARM2;
1676 	}
1677     }
1678 
1679     return c->err;
1680 }
1681 
pkg_params_compat(CMD * c)1682 static int pkg_params_compat (CMD *c)
1683 {
1684     /* swap pkgname into second position */
1685     c->parm2 = c->param;
1686 
1687     if (c->opt & OPT_R) {
1688 	/* compat for old --remove */
1689 	c->param = gretl_strdup("unload");
1690 	c->opt ^= OPT_R;
1691     } else if (c->opt & OPT_P) {
1692 	/* compat for old --purge */
1693 	c->param = gretl_strdup("remove");
1694 	c->opt ^= OPT_P;
1695     } else {
1696 	/* implicit alternative */
1697 	c->param = gretl_strdup("install");
1698     }
1699 
1700     return 0;
1701 }
1702 
1703 /* Get command parameter in last position; may involve
1704    compositing tokens.
1705 */
1706 
get_parm2(CMD * c,int options_later)1707 static int get_parm2 (CMD *c, int options_later)
1708 {
1709     cmd_token *tok;
1710     int pos;
1711 
1712     pos = options_later ? first_unused_arg_index(c) :
1713 	last_arg_index(c);
1714 
1715     if (pos < 0) {
1716 	if (c->ci == SMPL && c->opt == 0) {
1717 	    /* backward-compatible slop factor: allow missing ';'
1718 	       in second place?
1719 	    */
1720 	    ;
1721 	} else if (c->ci == PKG && (c->opt & OPT_B)) {
1722 	    /* "install" emulating "pkg" */
1723 	    c->err = pkg_params_compat(c);
1724 	    c->opt ^= OPT_B; /* scrub temporary option */
1725 	} else if (c->ci == DATAMOD && c->auxint == DS_EXPAND) {
1726 	    ; /* OK, parm2 is optional in this case */
1727 	} else if (!parm2_optional(c->ci)) {
1728 	    c->err = E_ARGS;
1729 	    fprintf(stderr, "%s: required parm2 is missing\n",
1730 		    c->toks[c->cstart].s);
1731 	}
1732 	return c->err;
1733     }
1734 
1735     tok = &c->toks[pos];
1736 
1737     if (c->ciflags & CI_VARGS) {
1738 	/* check for trailing comma: if it's present, the
1739 	   token we want will precede it */
1740 	if (tok->type == TOK_COMMA) {
1741 	    tok->flag |= TOK_DONE;
1742 	    pos--;
1743 	} else if (vargs_optional(c->ci)) {
1744 	    c->ciflags ^= CI_VARGS;
1745 	} else {
1746 	    c->err = E_PARSE;
1747 	    return c->err;
1748 	}
1749     }
1750 
1751     tok = &c->toks[pos];
1752 
1753     /* revised 2014-10-11 */
1754     if (token_joined(tok)) {
1755 	c->parm2 = merge_toks_r_to_l(c, pos);
1756     } else {
1757 	c->parm2 = tok->s;
1758 	if ((tok->flag & TOK_QUOTED) && c->ci == MODPRINT) {
1759 	    /* signal quoted status of string literal */
1760 	    c->opt |= OPT_L;
1761 	}
1762 	tok->flag |= TOK_DONE;
1763     }
1764 
1765     return c->err;
1766 }
1767 
1768 /* legacy: handle SETINFO fields */
1769 
get_quoted_dash_fields(CMD * c,const char * s)1770 static int get_quoted_dash_fields (CMD *c, const char *s)
1771 {
1772     char test[2] = {0};
1773     int pos = real_arg_index(c, 2);
1774     int i;
1775 
1776     if (pos < 0) {
1777 	return 0;
1778     }
1779 
1780     for (i=0; s[i]; i++) {
1781 	/* loop across "dash fields" */
1782 	test[0] = s[i];
1783 	pos = dash_char_index(c, test);
1784 	if (pos > 0) {
1785 	    if (c->toks[pos].type == TOK_STRING) {
1786 		token_to_param(c, pos, i+1);
1787 		mark_token_done(c->toks[pos]);
1788 	    } else {
1789 		c->err = E_PARSE;
1790 	    }
1791 	}
1792     }
1793 
1794     return c->err;
1795 }
1796 
first_arg_quoted(CMD * c)1797 static int first_arg_quoted (CMD *c)
1798 {
1799     int pos = real_arg_index(c, 1);
1800 
1801     if (pos < 0) {
1802 	return 0;
1803     } else {
1804 	return c->toks[pos].type == TOK_STRING;
1805     }
1806 }
1807 
1808 /* Count instances of list separator, ';', in the
1809    command line.
1810 */
1811 
cmd_get_sepcount(CMD * c)1812 static int cmd_get_sepcount (CMD *c)
1813 {
1814     int i, n = 0;
1815 
1816     for (i=c->cstart+1; i<c->ntoks; i++) {
1817 	if (c->toks[i].type == TOK_SEMIC) {
1818 	    n++;
1819 	}
1820     }
1821 
1822     return n;
1823 }
1824 
1825 /* Convert token @k to an integer. */
1826 
token_to_int(CMD * c,int k)1827 static int token_to_int (CMD *c, int k)
1828 {
1829     cmd_token *tok = &c->toks[k];
1830     int ret = -1;
1831 
1832     if (tok->type == TOK_INT) {
1833 	ret = atoi(tok->s);
1834 	tok->flag |= TOK_DONE;
1835     } else if (tok->type == TOK_NAME) {
1836 	double x = get_scalar_value_by_name(tok->s, &c->err);
1837 
1838 	if (!c->err) {
1839 	    if (x > 0 && x < INT_MAX) {
1840 		tok->flag |= TOK_DONE;
1841 		ret = x;
1842 	    } else {
1843 		c->err = E_INVARG;
1844 	    }
1845 	}
1846     } else {
1847 	c->err = E_INVARG;
1848     }
1849 
1850     return ret;
1851 }
1852 
list_max(const int * list)1853 static int list_max (const int *list)
1854 {
1855     int i, lmax = list[1];
1856 
1857     for (i=2; i<=list[0]; i++) {
1858 	if (list[i] > lmax) {
1859 	    lmax = list[i];
1860 	}
1861     }
1862 
1863     return lmax;
1864 }
1865 
get_auxlist(cmd_token * tok,int * err)1866 static int *get_auxlist (cmd_token *tok, int *err)
1867 {
1868     int *alist = NULL;
1869 
1870     if (tok->type == TOK_CBSTR) {
1871 	alist = gretl_list_from_string(tok->s, err);
1872     } else {
1873 	gretl_matrix *m = get_matrix_by_name(tok->s);
1874 
1875 	alist = gretl_auxlist_from_vector(m, err);
1876     }
1877 
1878     return alist;
1879 }
1880 
get_VAR_order(CMD * c,int k)1881 static int get_VAR_order (CMD *c, int k)
1882 {
1883     cmd_token *tok = &c->toks[k];
1884     int ret = -1;
1885 
1886     if (tok->type == TOK_INT) {
1887 	ret = atoi(tok->s);
1888     } else if (tok->type == TOK_NAME) {
1889 	double x = get_scalar_value_by_name(tok->s, &c->err);
1890 
1891 	if (!c->err) {
1892 	    if (x > 0 && x < INT_MAX) {
1893 		ret = x;
1894 	    } else {
1895 		gretl_errmsg_sprintf(_("Invalid lag order %g"), x);
1896 		c->err = E_INVARG;
1897 	    }
1898 	} else {
1899 	    /* could be a non-scalar matrix */
1900 	    c->err = gretl_error_clear();
1901 	    c->auxlist = get_auxlist(tok, &c->err);
1902 	}
1903     } else if (tok->type == TOK_CBSTR) {
1904 	c->auxlist = get_auxlist(tok, &c->err);
1905     } else {
1906 	c->err = E_INVARG;
1907     }
1908 
1909     if (!c->err) {
1910 	tok->flag |= TOK_DONE;
1911 	if (c->auxlist != NULL) {
1912 	    ret = list_max(c->auxlist);
1913 	}
1914     }
1915 
1916     return ret;
1917 }
1918 
get_bundled_int(CMD * c,int k)1919 static int get_bundled_int (CMD *c, int k)
1920 {
1921     char *s = merge_toks_l_to_r(c, k);
1922     int ret = -1;
1923 
1924     if (s != NULL) {
1925 	double x = get_scalar_value_by_name(s, &c->err);
1926 
1927 	if (!c->err) {
1928 	    if (x > 0 && x < INT_MAX) {
1929 		ret = x;
1930 	    } else {
1931 		c->err = E_INVARG;
1932 	    }
1933 	}
1934 	free(s);
1935     }
1936 
1937     return ret;
1938 }
1939 
1940 /* Some commands require an integer order as the first
1941    argument, but we also have the cases where an order
1942    in first argument position is optional, viz:
1943 
1944    var order ...
1945    modtest [ order ]
1946    lags [ order ; ] list
1947 */
1948 
get_command_order(CMD * c)1949 static int get_command_order (CMD *c)
1950 {
1951     int pos = real_arg_index(c, 1);
1952     int try_bundle = 0;
1953 
1954     if (pos >= 0) {
1955 	cmd_token *ntok = next_joined_token(c, pos);
1956 
1957 	/* In general, nothing should be "stuck onto" the
1958 	   order specifier; however, we have accepted ";"
1959 	   without an intervening space, and we might be
1960 	   given a bundle member in dot notation.
1961 	*/
1962 	if (ntok != NULL) {
1963 	    if (ntok->type == TOK_SEMIC) {
1964 		; /* OK */
1965 	    } else if (ntok->type == TOK_DOT) {
1966 		try_bundle = 1; /* try handling this */
1967 	    } else {
1968 		c->err = E_PARSE;
1969 		return c->err;
1970 	    }
1971 	}
1972     }
1973 
1974     if (c->ci == MODTEST && pos < 0) {
1975 	/* order is optional, not present, OK */
1976 	return 0;
1977     }
1978 
1979     if (c->ci == LAGS && cmd_get_sepcount(c) == 0) {
1980 	/* order is optional, not present, OK */
1981 	return 0;
1982     }
1983 
1984     if (pos < 0) {
1985 	c->err = E_ARGS;
1986     } else if (try_bundle) {
1987 	c->order = get_bundled_int(c, pos);
1988     } else if (c->ci == VAR) {
1989 	/* order can be special, "gappy" */
1990 	c->order = get_VAR_order(c, pos);
1991     } else {
1992 	c->order = token_to_int(c, pos);
1993     }
1994 
1995     return c->err;
1996 }
1997 
1998 /* special for VECM only, so far */
1999 
get_vecm_rank(CMD * c)2000 static int get_vecm_rank (CMD *c)
2001 {
2002     int pos = real_arg_index(c, 2);
2003 
2004     if (pos < 0) {
2005 	c->err = E_ARGS;
2006     } else {
2007 	cmd_token *nt = next_joined_token(c, pos);
2008 
2009 	if (nt != NULL && nt->type == TOK_DOT) {
2010 	    c->auxint = get_bundled_int(c, pos);
2011 	} else {
2012 	    c->auxint = token_to_int(c, pos);
2013 	}
2014     }
2015 
2016     return c->err;
2017 }
2018 
2019 /* get an optional, trailing "order" field */
2020 
get_optional_order(CMD * c)2021 static int get_optional_order (CMD *c)
2022 {
2023     int pos = first_unused_arg_index(c);
2024 
2025     if (pos > 0) {
2026 	cmd_token *nt = next_joined_token(c, pos);
2027 
2028 	if (nt != NULL && nt->type == TOK_DOT) {
2029 	    c->order = get_bundled_int(c, pos);
2030 	} else {
2031 	    c->order = token_to_int(c, pos);
2032 	}
2033     }
2034 
2035     return c->err;
2036 }
2037 
2038 /* Stuff that can come before a command proper: the "catch"
2039    keyword or assignment to a named object.
2040 */
2041 
handle_command_preamble(CMD * c)2042 static int handle_command_preamble (CMD *c)
2043 {
2044     int pos = 0;
2045 
2046     if (c->toks[0].type == TOK_CATCH) {
2047 	if (not_catchable(c->ci)) {
2048 	    gretl_errmsg_set(_("catch: cannot be applied to this command"));
2049 	    c->err = E_DATA;
2050 	    return c->err;
2051 	} else {
2052 	    set_gretl_errno(0);
2053 	    gretl_error_clear();
2054 	    c->flags |= CMD_CATCH;
2055 	    mark_token_done(c->toks[0]);
2056 	    pos = 1;
2057 	}
2058     }
2059 
2060     if (c->ntoks > pos + 1 && c->toks[pos+1].type == TOK_ASSIGN) {
2061 	char *s = c->toks[pos].s;
2062 	int n = strlen(s);
2063 
2064 	if (n >= MAXSAVENAME) {
2065 	    gretl_errmsg_set(_("savename is too long"));
2066 	    c->err = E_DATA;
2067 	} else {
2068 	    strcpy(c->savename, s);
2069 	    mark_token_done(c->toks[pos]);
2070 	    mark_token_done(c->toks[pos+1]);
2071 	}
2072     }
2073 
2074     return c->err;
2075 }
2076 
2077 #if CDEBUG
2078 
vstart_line_out(CMD * c)2079 static void vstart_line_out (CMD *c)
2080 {
2081     if (c->ciflags & CI_EXPR) {
2082 	fprintf(stderr, "* expr: '%s'\n", c->vstart);
2083     } else if (c->ciflags & CI_VARGS) {
2084 	fprintf(stderr, "* varargs: '%s'\n", c->vstart);
2085     }
2086 }
2087 
my_printlist(const int * list,const char * s)2088 static void my_printlist (const int *list, const char *s)
2089 {
2090     int i;
2091 
2092     if (list == NULL) {
2093 	fprintf(stderr, "%s is NULL\n", s);
2094 	return;
2095     }
2096 
2097     fprintf(stderr, "%s: %d : ", s, list[0]);
2098 
2099     for (i=1; i<=list[0]; i++) {
2100 	if (list[i] == LISTSEP) {
2101 	    fputs("; ", stderr);
2102 	} else {
2103 	    fprintf(stderr, "%d ", list[i]);
2104 	}
2105     }
2106 
2107     fputc('\n', stderr);
2108 }
2109 
2110 # if CDEBUG > 1
2111 
tokstring(char * s,cmd_token * toks,int i)2112 static char *tokstring (char *s, cmd_token *toks, int i)
2113 {
2114     cmd_token *tok = &toks[i];
2115 
2116     *s = '\0';
2117 
2118     if (tok->type == TOK_OPT) {
2119 	strcpy(s, "option");
2120     } else if (tok->type == TOK_SOPT) {
2121 	strcpy(s, "short-option");
2122     } else if (tok->type == TOK_STRING) {
2123 	strcpy(s, "quoted");
2124     } else if (tok->type == TOK_NUMBER) {
2125 	strcpy(s, "number");
2126     } else if (tok->type == TOK_INT) {
2127 	strcpy(s, "integer");
2128     } else if (tok->type == TOK_OPTDASH) {
2129 	strcpy(s, "option-leader");
2130     } else if (tok->type == TOK_DASH) {
2131 	strcpy(s, "dash");
2132     } else if (tok->type == TOK_DOT) {
2133 	strcpy(s, "dot");
2134     } else if (tok->type == TOK_COMMA) {
2135 	strcpy(s, "comma");
2136     } else if (tok->type == TOK_SEMIC) {
2137 	strcpy(s, "separator");
2138     } else if (tok->type == TOK_COLON) {
2139 	strcpy(s, "colon");
2140     } else if (tok->type == TOK_DDASH) {
2141 	strcpy(s, "double-dash");
2142     } else if (tok->type == TOK_DPLUS) {
2143 	strcpy(s, "double-plus");
2144     } else if (tok->type == TOK_DDOT) {
2145 	strcpy(s, "double-dot");
2146     } else if (tok->type == TOK_EQUALS) {
2147 	strcpy(s, "equals");
2148     } else if (tok->type == TOK_EQMOD) {
2149 	strcpy(s, "modified-equals");
2150     } else if (tok->type == TOK_OPTEQ) {
2151 	strcpy(s, "opt-equals");
2152     } else if (tok->type == TOK_OPTVAL) {
2153 	if (tok->flag & TOK_QUOTED) {
2154 	    strcpy(s, "quoted option-value");
2155 	} else {
2156 	    strcpy(s, "option-value");
2157 	}
2158     } else if (tok->type == TOK_ASSIGN) {
2159 	strcpy(s, "assign");
2160     } else if (tok->type == TOK_SYMB) {
2161 	strcpy(s, "symbol/operator");
2162     } else if (tok->type == TOK_PRSTR) {
2163 	strcpy(s, "paren-delimited");
2164     } else if (tok->type == TOK_CBSTR) {
2165 	strcpy(s, "brace-delimited");
2166     } else if (tok->type == TOK_BRSTR) {
2167 	strcpy(s, "bracket-delimited");
2168     } else if (tok->type == TOK_DOLSTR) {
2169 	strcpy(s, "$-variable");
2170     } else if (tok->type == TOK_EVAL) {
2171 	strcpy(s, "eval-token");
2172     } else {
2173 	strcpy(s, "regular");
2174     }
2175 
2176     if (tok->type != TOK_OPT && tok->type != TOK_SOPT &&
2177 	tok->type != TOK_OPTEQ && tok->type != TOK_OPTVAL &&
2178 	token_joined(tok)) {
2179 	strcat(s, ", joined on left");
2180     }
2181 
2182     if (token_ignored(tok)) {
2183 	strcat(s, ", ignored!");
2184     } else if (token_done(tok)) {
2185 	strcat(s, ", handled");
2186     } else {
2187 	strcat(s, ", not handled");
2188     }
2189 
2190     return s;
2191 }
2192 
2193 # endif
2194 
print_option_flags(gretlopt opt)2195 static void print_option_flags (gretlopt opt)
2196 {
2197     const char *flags = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2198     gretlopt i, n = 0;
2199 
2200     for (i=OPT_A; i<=OPT_Y; i*=2) {
2201 	if (opt & i) {
2202 	    if (n > 0) {
2203 		fputc('|', stderr);
2204 	    }
2205 	    fprintf(stderr, "OPT_%c", *flags);
2206 	    n++;
2207 	}
2208 	flags++;
2209     }
2210 }
2211 
print_tokens(CMD * c)2212 static void print_tokens (CMD *c)
2213 {
2214     const char *inh = "";
2215     cmd_token *toks = c->toks;
2216     int i;
2217 
2218 # if CDEBUG > 1
2219     int nt = c->ntoks;
2220     char desc[48];
2221 
2222     if (c->ciflags & (CI_EXPR | CI_ADHOC)) {
2223 	fprintf(stderr, "tokens examined so far:\n");
2224     } else {
2225 	fprintf(stderr, "tokens:\n");
2226     }
2227 
2228     for (i=0; i<nt; i++) {
2229 	fprintf(stderr, "%3d: '%s' (%s)\n", i, toks[i].s,
2230 		tokstring(desc, toks, i));
2231     }
2232 # endif
2233 
2234     if (c->context || c->ci == END) {
2235 	inh = " (inherited)";
2236     };
2237 
2238     if (c->flags & CMD_CATCH) {
2239 	fprintf(stderr, "* catching errors%s\n", inh);
2240     } else if (*c->savename != '\0') {
2241 	fprintf(stderr, "* assignment to '%s'%s\n",
2242 		c->savename, inh);
2243     }
2244 
2245     i = c->cstart;
2246 
2247     if (c->ci > 0) {
2248 	if (c->context) {
2249 	    fprintf(stderr, "* command: '%s' (%d), context = '%s' (%d)\n",
2250 		    gretl_command_word(c->ci), c->ci,
2251 		    gretl_command_word(c->context), c->context);
2252 	} else {
2253 	    fprintf(stderr, "* command: '%s' (%d), context = null\n",
2254 		    gretl_command_word(c->ci), c->ci);
2255 	}
2256     } else {
2257 	fprintf(stderr, "* '%s': don't understand\n", toks[i].s);
2258     }
2259 
2260     if (c->err == E_BADOPT) {
2261 	fprintf(stderr, "* command contains invalid option\n");
2262     } else if (c->opt) {
2263 	fprintf(stderr, "* command option(s) valid (");
2264 	print_option_flags(c->opt);
2265 	fprintf(stderr, ")\n");
2266     }
2267 
2268     if (c->param != NULL) {
2269 	fprintf(stderr, "* param: '%s'\n", c->param);
2270     }
2271 
2272     if (c->parm2 != NULL) {
2273 	fprintf(stderr, "* parm2: '%s'\n", c->parm2);
2274     }
2275 
2276     if (c->order != 0) {
2277 	fprintf(stderr, "* order = %d\n", c->order);
2278     }
2279 
2280     if (c->auxint != 0) {
2281 	fprintf(stderr, "* auxint = %d\n", c->auxint);
2282     }
2283 
2284     if (c->err) {
2285 	fprintf(stderr, "*** error = %d (%s)\n\n", c->err,
2286 		errmsg_get_with_default(c->err));
2287 	return;
2288     }
2289 
2290     if (c->ciflags & CI_LIST) {
2291 	my_printlist(c->list, "list");
2292     } else if (c->vstart != NULL) {
2293 	vstart_line_out(c);
2294     }
2295 
2296     if (c->auxlist != NULL) {
2297 	my_printlist(c->auxlist, "auxlist");
2298     }
2299 
2300     fputc('\n', stderr);
2301 }
2302 
2303 #endif /* CDEBUG */
2304 
legacy_accept_strays(CMD * c,int i)2305 static int legacy_accept_strays (CMD *c, int i)
2306 {
2307     if (c->ci == OUTFILE && c->opt == OPT_C) {
2308 	/* redundant parameter for outfile --close ? */
2309 	fprintf(stderr, "+++ outfile --close: ignoring "
2310 		"redundant filename\n");
2311 	mark_token_done(c->toks[i]);
2312 	return 1;
2313     }
2314 
2315     return 0;
2316 }
2317 
check_for_stray_tokens(CMD * c)2318 static int check_for_stray_tokens (CMD *c)
2319 {
2320     if (!(c->ciflags & (CI_EXPR | CI_ADHOC))) {
2321 	cmd_token *tok;
2322 	int i;
2323 
2324 	for (i=0; i<c->ntoks && !c->err; i++) {
2325 	    tok = &c->toks[i];
2326 	    if (!token_done(tok) && !legacy_accept_strays(c, i)) {
2327 		gretl_errmsg_sprintf(_("Parse error at unexpected token '%s'"),
2328 				     tok->s);
2329 		c->err = E_PARSE;
2330 	    }
2331 	}
2332     }
2333 
2334     return c->err;
2335 }
2336 
check_list_sepcount(int ci,int nsep)2337 static int check_list_sepcount (int ci, int nsep)
2338 {
2339     int minsep, maxsep;
2340     int err = 0;
2341 
2342     maxsep = get_sep_max(ci, &minsep);
2343 
2344     if (nsep < minsep) {
2345 	err = E_ARGS;
2346     } else if (nsep > maxsep) {
2347 	gretl_errmsg_sprintf(_("The symbol '%c' is not valid in this context\n"),
2348 			     ';');
2349 	err = E_INVARG;
2350     }
2351 
2352 #if CDEBUG
2353     if (err) {
2354 	fprintf(stderr, "error %d from check_list_sepcount\n", err);
2355     }
2356 #endif
2357 
2358     return err;
2359 }
2360 
2361 /* Determine if the command-line is a "genr"-type expression,
2362    which will be directed to a separate parser -- this gets
2363    invoked if we haven't been able to find a recognizable
2364    command-word.
2365 */
2366 
test_for_genr(CMD * c,int i,char cnext,DATASET * dset)2367 static int test_for_genr (CMD *c, int i, char cnext, DATASET *dset)
2368 {
2369     cmd_token *toks = c->toks;
2370     char *s = toks[i].s;
2371     int j = c->ntoks - 1;
2372 
2373     if (j > i && (toks[j].type == TOK_EQUALS || toks[j].type == TOK_EQMOD)) {
2374 	c->ci = GENR;
2375     } else if (dset != NULL && cnext != '(' && current_series_index(dset, s) >= 0) {
2376 	c->ci = GENR;
2377     } else if (cnext != '(' && gretl_is_user_var(s)) {
2378 	c->ci = GENR;
2379     } else if (toks[i].type == TOK_NAME && c->ntoks > i + 1) {
2380 	cmd_token *nexttok = &toks[i+1];
2381 
2382 	if ((nexttok->type == TOK_DPLUS ||
2383 	     nexttok->type == TOK_DDASH) &&
2384 	    token_joined(nexttok)) {
2385 	    /* increment/decrement */
2386 	    c->ci = GENR;
2387 	} else if (nexttok->type == TOK_EQUALS || nexttok->type == TOK_EQMOD) {
2388 	    /* assignment token in second place */
2389 	    c->ci = GENR;
2390 	} else if (nexttok->type == TOK_BRSTR && token_joined(nexttok)) {
2391 	    /* assignment to array or series element(s) */
2392 	    c->ci = GENR;
2393 	} else if (function_lookup(s) || get_user_function_by_name(s) ||
2394 		   is_function_alias(s)) {
2395 	    /* function call, no assignment */
2396 	    c->ci = GENR;
2397 	    c->opt |= OPT_O;
2398 	}
2399 #ifdef USE_RLIB
2400 	else if (j == 1 && toks[j].type == TOK_DOT &&
2401 		 libset_get_bool(R_FUNCTIONS) &&
2402 		 toks[0].type == TOK_NAME && !strcmp(toks[0].s, "R")) {
2403 	    /* R function call, no assignment */
2404 	    c->ci = GENR;
2405 	    c->opt |= OPT_O;
2406 	}
2407 #endif
2408     }
2409 
2410     return c->ci;
2411 }
2412 
set_deprecation(const char * bad,const char * good,int command)2413 static void set_deprecation (const char *bad, const char *good,
2414 			     int command)
2415 {
2416     const char *tag = command ? "command" : "construction";
2417 
2418     if (strstr(good, "()")) {
2419 	gretl_warnmsg_sprintf(_("\"%s\": obsolete %s; please use the function %s"),
2420 			      bad, tag, good);
2421     } else {
2422 	gretl_warnmsg_sprintf(_("\"%s\": obsolete %s; please use \"%s\""),
2423 			      bad, tag, good);
2424     }
2425 }
2426 
try_for_command_alias(const char * s,CMD * cmd)2427 static int try_for_command_alias (const char *s, CMD *cmd)
2428 {
2429     int ci = 0;
2430 
2431     if (!strcmp(s, "exit")) {
2432 	ci = QUIT;
2433 	cmd->opt = OPT_X;
2434     } else if (!strcmp(s, "ls")) {
2435 	ci = VARLIST;
2436     } else if (!strcmp(s, "pooled")) {
2437 	set_deprecation("pooled", "ols", 1);
2438 	ci = OLS;
2439     } else if (!strcmp(s, "equations")) {
2440 	/* reached only when compiling loop */
2441 	ci = EQUATION;
2442 	cmd->opt |= OPT_M;
2443     } else if (*s == '!') {
2444 	ci = SHELL;
2445     } else if (!strcmp(s, "launch")) {
2446 	ci = SHELL;
2447 	cmd->opt |= OPT_A;
2448     } else if (!strcmp(s, "fcasterr")) {
2449 	set_deprecation("fcasterr", "fcast", 1);
2450 	ci = FCAST;
2451     } else if (!strcmp(s, "install")) {
2452 	ci = PKG;
2453 	cmd->opt |= OPT_B; /* back-compat */
2454 #if ALLOW_ADDOBS
2455     } else if (!strcmp(s, "addobs")) {
2456 	set_deprecation("addobs", "dataset addobs", 0);
2457 	ci = DATAMOD;
2458 #endif
2459     } else if (!strcmp(s, "continue")) {
2460 	ci = FUNDEBUG;
2461 	cmd->opt |= OPT_C;
2462     } else if (!strcmp(s, "next")) {
2463 	ci = FUNDEBUG;
2464 	cmd->opt |= OPT_N;
2465     } else if (!strcmp(s, "undebug")) {
2466 	ci = FUNDEBUG;
2467 	cmd->opt |= OPT_Q;
2468     }
2469 
2470     return ci;
2471 }
2472 
peek_next_char(CMD * cmd,int i)2473 static char peek_next_char (CMD *cmd, int i)
2474 {
2475     const char *s;
2476 
2477     s = cmd->toks[i].lp + strlen(cmd->toks[i].s);
2478     s += strspn(s, " ");
2479     return *s;
2480 }
2481 
peek_end_param(CMD * cmd,int i)2482 static int peek_end_param (CMD *cmd, int i)
2483 {
2484     const char *s;
2485 
2486     s = cmd->toks[i].lp + strlen(cmd->toks[i].s);
2487     s += strspn(s, " ");
2488 
2489     if (!strncmp(s, "loop", 4)) {
2490 	return LOOP;
2491     } else if (!strncmp(s, "function", 8)) {
2492 	return FUNC;
2493     } else {
2494 	return 0;
2495     }
2496 }
2497 
2498 #if CDEBUG > 1
2499 
maybe_report_command_index(CMD * cmd,const char * s)2500 static void maybe_report_command_index (CMD *cmd, const char *s)
2501 {
2502     if (cmd->ci > 0) {
2503 	const char *word = gretl_command_word(cmd->ci);
2504 
2505 	fprintf(stderr, "try_for_command_index: ci = %d (%s)",
2506 		cmd->ci, word);
2507 	if (strcmp(word, s)) {
2508 	    fprintf(stderr, ", actual word = '%s'", s);
2509 	}
2510 	fputc('\n', stderr);
2511     }
2512 }
2513 
2514 #endif
2515 
2516 /* If we have enough tokens parsed, try to determine the
2517    current command index.
2518 */
2519 
try_for_command_index(CMD * cmd,int i,DATASET * dset,int compmode,int * err)2520 static int try_for_command_index (CMD *cmd, int i,
2521 				  DATASET *dset,
2522 				  int compmode,
2523 				  int *err)
2524 {
2525     cmd_token *toks = cmd->toks;
2526     const char *test = toks[i].s;
2527 
2528     cmd->ci = gretl_command_number(test);
2529 
2530     if (cmd->context && cmd->ci != END) {
2531 	if (cmd->context == FOREIGN || cmd->context == MPI) {
2532 	    /* Do not attempt to parse! Note: we get here only
2533 	       when "compiling" a foreign block into a loop or
2534 	       function.
2535 	    */
2536 	    cmd->ciflags = CI_EXPR;
2537 	    cmd->ci = cmd->context;
2538 	} else {
2539 	    /* We're inside a "native" block of some kind.
2540 	       In that case the line should be passed "as
2541 	       is" to the cumulator for the block, with one
2542 	       exception, namely the "equation" command
2543 	       within a "system" block.
2544 	    */
2545 	    if (cmd->context == SYSTEM && !strcmp(test, "equations")) {
2546 		cmd->ci = EQUATION;
2547 		cmd->opt |= OPT_M;
2548 	    } else if (cmd->context == SYSTEM && cmd->ci == EQUATION) {
2549 		; /* OK */
2550 	    } else {
2551 		cmd->ci = cmd->context;
2552 	    }
2553 	}
2554     }
2555 
2556     if (cmd->ci > 0 && has_function_form(cmd->ci)) {
2557 	/* disambiguate command versus function */
2558 	if (peek_next_char(cmd, i) == '(') {
2559 	    /* must be function form, not command proper */
2560 	    cmd->ci = 0;
2561 	    goto gentest;
2562 	}
2563     } else if (cmd->ci == 0) {
2564 	cmd->ci = try_for_command_alias(test, cmd);
2565     }
2566 
2567 #if CDEBUG > 1
2568     maybe_report_command_index(cmd, test);
2569 #endif
2570 
2571  gentest:
2572 
2573     if (cmd->ci <= 0 && cmd->ntoks < 5) {
2574 	char cnext = peek_next_char(cmd, i);
2575 
2576 	cmd->ci = test_for_genr(cmd, i, cnext, dset);
2577     }
2578 
2579     if (cmd->ci > 0) {
2580 	mark_token_done(toks[i]);
2581 	if (cmd->ci == cmd->context) {
2582 	    cmd->ciflags = CI_EXPR;
2583 	} else {
2584 	    cmd->ciflags = command_get_flags(cmd->ci);
2585 	    if (cmd->ci == EQUATION && (cmd->opt & OPT_M)) {
2586 		/* the system "equations" keyword */
2587 		cmd->ciflags ^= CI_LIST;
2588 		cmd->ciflags |= CI_PARM1;
2589 		cmd->ciflags |= CI_PARM2;
2590 	    }
2591 	    if (cmd->ci == STORE && (cmd->flags & CMD_PROG)) {
2592 		cmd->ciflags ^= CI_LIST;
2593 		cmd->ciflags ^= CI_DOALL;
2594 		cmd->ciflags |= CI_EXTRA;
2595 	    }
2596 	    if (cmd->ci == GENR) {
2597 		GretlType gtype = 0;
2598 
2599 		if (!strcmp(test, "list")) {
2600 		    if (peek_next_char(cmd, i) == '\0') {
2601 			/* just "list" by itself */
2602 			cmd->ci = VARLIST;
2603 			cmd->ciflags = 0;
2604 		    } else {
2605 			/* probably "genr" but might be a special */
2606 			gtype = cmd->gtype = GRETL_TYPE_LIST;
2607 			cmd->ciflags |= CI_LCHK;
2608 		    }
2609 		} else if ((gtype = gretl_get_gen_type(test)) > 0) {
2610 		    cmd->gtype = gtype;
2611 		}
2612 		if (gtype == 0 && get_list_by_name(test)) {
2613 		    cmd->ciflags |= CI_LGEN;
2614 		}
2615 	    }
2616 	    if (compmode && cmd->ci == END) {
2617 		int endci = peek_end_param(cmd, i);
2618 
2619 		if (compmode == FUNC && endci == FUNC) {
2620 		    cmd->flags |= CMD_ENDFUN;
2621 		} else if (endci == LOOP) {
2622 		    gretl_errmsg_set(_("'end loop': did you mean 'endloop'"));
2623 		    gretl_abort_compiling_loop();
2624 		    *err = E_PARSE;
2625 		}
2626 	    }
2627 	}
2628     }
2629 
2630     return cmd->ci;
2631 }
2632 
2633 /* Get a count of any tokens not marked as 'done'. */
2634 
count_remaining_toks(CMD * c)2635 static int count_remaining_toks (CMD *c)
2636 {
2637     int i, n = 0;
2638 
2639     for (i=c->cstart+1; i<c->ntoks; i++) {
2640 	if (!(c->toks[i].flag & TOK_DONE)) {
2641 	    n++;
2642 	}
2643     }
2644 
2645     return n;
2646 }
2647 
process_auxlist_term(CMD * c,cmd_token * tok,int ** ilistptr)2648 static int process_auxlist_term (CMD *c, cmd_token *tok,
2649 				 int **ilistptr)
2650 {
2651     int *ilist = *ilistptr;
2652     int err = 0;
2653 
2654     if (ilist == NULL) {
2655 	c->auxlist = get_auxlist(tok, &err);
2656 	if (!err) {
2657 	    /* add placeholder to @ilist */
2658 	    gretl_list_append_term(ilistptr, list_max(c->auxlist));
2659 	}
2660     } else if (c->ci == ARMA && ilist[0] < 3) {
2661 	int *aux2 = get_auxlist(tok, &err);
2662 
2663 	if (!err) {
2664 	    int *tmp;
2665 
2666 	    tmp = gretl_lists_join_with_separator(c->auxlist, aux2);
2667 	    if (tmp == NULL) {
2668 		c->err = E_ALLOC;
2669 	    } else {
2670 		gretl_list_append_term(ilistptr, list_max(aux2));
2671 		free(c->auxlist);
2672 		c->auxlist = tmp;
2673 	    }
2674 	}
2675 	free(aux2);
2676     } else {
2677 	/* we got too many auxlist terms */
2678 	err = E_PARSE;
2679     }
2680 
2681     return err;
2682 }
2683 
try_auxlist_term(CMD * cmd,cmd_token * tok,int scount)2684 static int try_auxlist_term (CMD *cmd, cmd_token *tok, int scount)
2685 {
2686     if (cmd->ci != ARMA && cmd->ci != DPANEL && cmd->ci != VAR) {
2687 	/* only supported by these commands */
2688 	return 0;
2689     }
2690     if (scount > 0) {
2691 	/* we must be in first sublist */
2692 	return 0;
2693     }
2694     if (tok->type == TOK_CBSTR || (tok->type == TOK_NAME &&
2695 				   get_matrix_by_name(tok->s))) {
2696 	/* not a regular "int list" term */
2697 	return 1;
2698     } else {
2699 	return 0;
2700     }
2701 }
2702 
check_arma_ilist(const int * ilist,const char * vtest)2703 static int check_arma_ilist (const int *ilist,
2704 			     const char *vtest)
2705 {
2706     if (ilist != NULL && ilist[0] == 3) {
2707 	/* got all 3 ARIMA fields: p, d, q */
2708 	if (vtest[1] != 0) {
2709 	    /* we got a vector term for 'd' */
2710 	    return E_PARSE;
2711 	}
2712     }
2713 
2714     return 0;
2715 }
2716 
panel_gmm_special(CMD * cmd,const char * s)2717 static int panel_gmm_special (CMD *cmd, const char *s)
2718 {
2719     if (cmd->ci == DPANEL) {
2720 	if (!strcmp(s, "GMM") || !strcmp(s, "GMMlevel")) {
2721 	    return 1;
2722 	}
2723     }
2724 
2725     return 0;
2726 }
2727 
midas_term_special(CMD * cmd,const char * s)2728 static int midas_term_special (CMD *cmd, const char *s)
2729 {
2730     if (cmd->ci == MIDASREG) {
2731 	if (!strcmp(s, "mds") || !strcmp(s, "mdsl")) {
2732 	    return 1;
2733 	}
2734     }
2735 
2736     return 0;
2737 }
2738 
rejoin_list_toks(CMD * c,int k1,int * k2,char * lstr,int j)2739 static void rejoin_list_toks (CMD *c, int k1, int *k2,
2740 			      char *lstr, int j)
2741 {
2742     cmd_token *tok;
2743     int i;
2744 
2745     if (j > 0) {
2746 	strcat(lstr, " ");
2747     }
2748 
2749     for (i=k1; i<c->ntoks; i++) {
2750 	tok = &c->toks[i];
2751 	if (i > k1 && (!token_joined(tok) || token_done(tok))) {
2752 	    break;
2753 	}
2754 	*k2 = i;
2755 	if (tok->type == TOK_PRSTR) {
2756 	    /* lag spec, for example */
2757 	    strcat(lstr, "(");
2758 	    strcat(lstr, tok->s);
2759 	    strcat(lstr, ")");
2760 	} else if (tok->type == TOK_BRSTR) {
2761 	    /* element of named list, for example */
2762 	    strcat(lstr, "[");
2763 	    strcat(lstr, tok->s);
2764 	    strcat(lstr, "]");
2765 	} else if (i < c->ntoks - 1 && panel_gmm_special(c, tok->s)) {
2766 	    cmd_token *next = &c->toks[i+1];
2767 	    char *tmp;
2768 
2769 	    tmp = gretl_strdup_printf("%s(%s)", tok->s, next->s);
2770 	    c->param = gretl_str_expand(&c->param, tmp, " ");
2771 	    free(tmp);
2772 	    mark_list_token_done(c->toks[i]);
2773 	    *k2 = ++i;
2774 	} else if (i < c->ntoks - 1 && midas_term_special(c, tok->s)) {
2775 	    cmd_token *next = &c->toks[i+1];
2776 	    char *tmp;
2777 
2778 	    tmp = gretl_strdup_printf("%s(%s)", tok->s, next->s);
2779 	    c->param = gretl_str_expand(&c->param, tmp, " ");
2780 	    free(tmp);
2781 	    mark_list_token_done(c->toks[i]);
2782 	    *k2 = ++i;
2783 	} else {
2784 	    strcat(lstr, tok->s);
2785 	}
2786 	mark_list_token_done(c->toks[i]);
2787     }
2788 }
2789 
validate_list_token(cmd_token * tok)2790 static int validate_list_token (cmd_token *tok)
2791 {
2792     /* In a "genr" context we might accept a scalar or matrix as
2793        representing one or more integer series IDs in constructing a
2794        list, but this is not acceptable in the regular command
2795        context. So we'll screen out all "uservars" other than named
2796        lists (this does not include series, which are of course OK).
2797     */
2798 
2799     if (tok->type == TOK_NAME) {
2800 	GretlType t = user_var_get_type_by_name(tok->s);
2801 
2802 	return t == GRETL_TYPE_NONE || t == GRETL_TYPE_LIST;
2803     } else {
2804 	return 1;
2805     }
2806 }
2807 
2808 /* In case we got something that looks like a list but
2809    turned out not to be valid as such, remove the TOK_DONE
2810    flag from the tokens that composed the putative list,
2811    which will be marked with TOK_LSTR status.
2812 */
2813 
rescind_tok_done_status(CMD * c)2814 static void rescind_tok_done_status (CMD *c)
2815 {
2816     cmd_token *tok;
2817     int i;
2818 
2819     for (i=c->cstart+1; i<c->ntoks; i++) {
2820 	tok = &c->toks[i];
2821 	if (tok->flag & TOK_LSTR) {
2822 	    tok->flag ^= TOK_DONE;
2823 	    tok->flag ^= TOK_LSTR;
2824 	}
2825     }
2826 }
2827 
process_command_list(CMD * c,DATASET * dset)2828 static int process_command_list (CMD *c, DATASET *dset)
2829 {
2830     guint8 TOK_PROV = (TOK_DONE | TOK_LSTR);
2831     char vectest[3] = {0}; /* for arima */
2832     char lstr[MAXLINE];
2833     cmd_token *tok;
2834     int *ilist = NULL;
2835     int *vlist = NULL;
2836     int want_ints = 0;
2837     int scount = 0;
2838     int i, j, k, ns;
2839 
2840     if (c->ciflags & CI_L1INT) {
2841 	want_ints = 1;
2842     }
2843 
2844     ns = cmd_get_sepcount(c);
2845 
2846     *lstr = '\0';
2847     j = 0;
2848 
2849     for (i=c->cstart+1; i<c->ntoks && !c->err; i++) {
2850 	tok = &c->toks[i];
2851 	if (tok->type == TOK_SEMIC) {
2852 	    scount++;
2853 	    if (c->ci == LAGS && scount == 1) {
2854 		/* the separator that follows the optional number
2855 		   of lags to create */
2856 		tok->flag |= TOK_DONE;
2857 	    } else if (c->ci == ARMA && ns == 2) {
2858 		if (scount == 1) {
2859 		    gretl_list_append_term(&ilist, LISTSEP);
2860 		    tok->flag |= TOK_DONE;
2861 		} else if (scount == 2) {
2862 		    want_ints = 0;
2863 		    tok->flag |= TOK_DONE;
2864 		}
2865 	    } else if (want_ints) {
2866 		want_ints = 0;
2867 		tok->flag |= TOK_DONE;
2868 	    } else if (c->ci == MPOLS) {
2869 		want_ints = 1;
2870 		tok->flag |= TOK_DONE;
2871 	    }
2872 	}
2873 	if (!token_done(tok)) {
2874 	    if (bracketed_type(tok->type) && c->ci != PRINT) {
2875 		gretl_errmsg_sprintf(_("Parse error at unexpected token '%s'"),
2876 				     tok->type == TOK_PRSTR ? "(" : "[");
2877 		c->err = E_PARSE;
2878 	    } else if (want_ints) {
2879 		if (try_auxlist_term(c, tok, scount)) {
2880 		    /* a vector-style entry */
2881 		    c->err = process_auxlist_term(c, tok, &ilist);
2882 		    if (!c->err) {
2883 			vectest[ilist[0] - 1] = 1;
2884 			tok->flag |= TOK_PROV;
2885 		    }
2886 		} else {
2887 		    k = gretl_int_from_string(tok->s, &c->err);
2888 		    if (!c->err) {
2889 			gretl_list_append_term(&ilist, k);
2890 			tok->flag |= TOK_PROV;
2891 		    }
2892 		}
2893 		if (c->err) {
2894 		    gretl_errmsg_sprintf(_("Parse error at unexpected token '%s'"),
2895 					 tok->s);
2896 		}
2897 	    } else if (next_joined_token(c, i) != NULL) {
2898 		rejoin_list_toks(c, i, &i, lstr, j++);
2899 	    } else {
2900 		if (validate_list_token(tok)) {
2901 		    if (j > 0) {
2902 			strcat(lstr, " ");
2903 		    }
2904 		    strcat(lstr, tok->s);
2905 		    tok->flag |= TOK_PROV;
2906 		    j++;
2907 		}
2908 	    }
2909 	}
2910 	if (j == 1 && (c->ciflags & CI_LLEN1)) {
2911 	    break;
2912 	} else if (j == 2 && (c->ciflags & CI_LLEN2)) {
2913 	    break;
2914 	}
2915     }
2916 
2917     if (!c->err) {
2918 	c->err = check_list_sepcount(c->ci, ns);
2919     }
2920 
2921     if (!c->err && c->ci == ARMA) {
2922 	c->err = check_arma_ilist(ilist, vectest);
2923     }
2924 
2925     if (!c->err && *lstr != '\0') {
2926 	tailstrip(lstr);
2927 	if (c->ci == DPANEL || c->ci == MIDASREG) {
2928 	    /* We may have a ';' separator that's not followed
2929 	       by any regular second list, just special terms; so
2930 	       don't error out on a trailing ';' in defining a
2931 	       list.
2932 	    */
2933 	    if (lstr[strlen(lstr)-1] == ';') {
2934 		lstr[strlen(lstr)-1] = '\0';
2935 	    }
2936 	}
2937     }
2938 
2939 #if CDEBUG > 1
2940     fprintf(stderr, "process_command_list: lstr='%s' (err=%d)\n", lstr, c->err);
2941 #endif
2942 
2943     if ((c->ci == DELEET || c->ci == PRINT) && *lstr == '\0') {
2944 	/* we didn't get a "list string": maybe the terms are
2945 	   names of non-series variables
2946 	*/
2947 	c->ciflags &= ~CI_LIST;
2948 	c->ciflags &= ~CI_DOALL;
2949 	c->ciflags |= CI_ADHOC;
2950 	c->err = 0;
2951     }
2952 
2953     if (!c->err && dset != NULL && *lstr != '\0') {
2954 	vlist = generate_list(lstr, dset, &c->err);
2955 	if (c->err && (c->ci == DELEET || c->ci == PRINT)) {
2956 	    /* we got something that looked like a list string,
2957 	       but list generation failed: again, maybe the
2958 	       the terms are names of non-series variables
2959 	    */
2960 	    rescind_tok_done_status(c);
2961 	    c->ciflags &= ~CI_LIST;
2962 	    c->ciflags &= ~CI_DOALL;
2963 	    c->ciflags |= CI_ADHOC;
2964 	    c->err = 0;
2965 	    goto finish;
2966 	}
2967     }
2968 
2969     if (!c->err) {
2970 	if (c->ci == MPOLS && vlist != NULL && ilist != NULL) {
2971 	    /* legacy mpols special */
2972 	    c->list = gretl_lists_join_with_separator(vlist, ilist);
2973 	    if (c->list == NULL) {
2974 		c->err = E_ALLOC;
2975 	    }
2976 	} else if (vlist != NULL) {
2977 	    if (ilist != NULL) {
2978 		c->list = gretl_lists_join_with_separator(ilist, vlist);
2979 		if (c->list == NULL) {
2980 		    c->err = E_ALLOC;
2981 		}
2982 	    } else {
2983 		c->list = vlist;
2984 		vlist = NULL;
2985 	    }
2986 	} else if (ilist != NULL) {
2987 	    /* a "pure" ints list */
2988 	    c->list = ilist;
2989 	    ilist = NULL;
2990 	}
2991     }
2992 
2993  finish:
2994 
2995     free(ilist);
2996     free(vlist);
2997 
2998 #if CDEBUG > 1
2999     fprintf(stderr, "process_command_list: returning err = %d\n", c->err);
3000 #endif
3001 
3002     return c->err;
3003 }
3004 
handle_adhoc_string(CMD * c)3005 static int handle_adhoc_string (CMD *c)
3006 {
3007     PRN *prn;
3008     cmd_token *tok;
3009     int i, j = 0;
3010     int err = 0;
3011 
3012     prn = gretl_print_new(GRETL_PRINT_BUFFER, &err);
3013     if (err) {
3014 	return err;
3015     }
3016 
3017     if (c->ci == FUNC) {
3018 	pputs(prn, "function ");
3019     }
3020 
3021     for (i=c->cstart+1; i<c->ntoks; i++) {
3022 	tok = &c->toks[i];
3023 	/* 2019-02-27: was !option_type(tok->type) below */
3024 	if (!token_done(tok)) {
3025 	    if (j > 0 && !token_joined(tok)) {
3026 		pputc(prn, ' ');
3027 	    }
3028 	    if (tok->flag & TOK_QUOTED) {
3029 		pprintf(prn, "\"%s\"", tok->s);
3030 	    } else if (tok->type == TOK_PRSTR) {
3031 		pprintf(prn, "(%s)", tok->s);
3032 	    } else if (tok->type == TOK_CBSTR) {
3033 		pprintf(prn, "{%s}", tok->s);
3034 	    } else if (tok->type == TOK_BRSTR) {
3035 		pprintf(prn, "[%s]", tok->s);
3036 	    } else {
3037 		pputs(prn, tok->s);
3038 	    }
3039 	    j++;
3040 	}
3041     }
3042 
3043     if (c->vstart != NULL) {
3044 	const char *s = c->vstart;
3045 
3046 	s += strspn(s, " ");
3047 
3048 	if (*s) {
3049 	    if (j > 0) {
3050 		pputc(prn, ' ');
3051 	    }
3052 	    pputs(prn, s);
3053 	} else {
3054 	    c->vstart = NULL;
3055 	}
3056     }
3057 
3058     /* In general, any ad hoc portion of a command should
3059        be stuffed into its @param member. However, with
3060        PRINT, @param is reserved for a string literal so
3061        we use @parm2 instead.
3062     */
3063 
3064     if (c->ci == PRINT) {
3065 	c->parm2 = gretl_print_steal_buffer(prn);
3066     } else {
3067 	c->param = gretl_print_steal_buffer(prn);
3068     }
3069 
3070     gretl_print_destroy(prn);
3071 
3072     return err;
3073 }
3074 
n_regular_tokens(CMD * c)3075 static int n_regular_tokens (CMD *c)
3076 {
3077     int i, n = c->ntoks;
3078 
3079     if (c->opt) {
3080 	for (i=0; i<c->ntoks; i++) {
3081 	    if (c->toks[i].type == TOK_OPT ||
3082 		c->toks[i].type == TOK_SOPT ||
3083 		c->toks[i].type == TOK_OPTDASH ||
3084 		c->toks[i].type == TOK_OPTEQ ||
3085 		c->toks[i].type == TOK_OPTVAL) {
3086 		n--;
3087 	    }
3088 	}
3089     }
3090 
3091     return n;
3092 }
3093 
3094 #if SEMIC_TEST
3095 
free_semicolon(char * s)3096 static char *free_semicolon (char *s)
3097 {
3098     int quoted = 0;
3099     int braced = 0;
3100     char *ret = NULL;
3101 
3102     while (*s) {
3103 	if (*s == '"') {
3104 	    quoted = !quoted;
3105 	} else if (*s == '{') {
3106 	    braced++;
3107 	} else if (*s == '}') {
3108 	    braced--;
3109 	} else if (*s == ';' && !quoted && !braced) {
3110 	    ret = s;
3111 	    break;
3112 	}
3113 	s++;
3114     }
3115 
3116     return ret;
3117 }
3118 
3119 #endif
3120 
handle_command_extra(CMD * c)3121 static int handle_command_extra (CMD *c)
3122 {
3123     cmd_token *tok;
3124     int i;
3125 
3126     if (c->ci == GNUPLOT || c->ci == BXPLOT || c->ci == HFPLOT) {
3127 	/* if present, 'extra' goes into param */
3128 	for (i=c->cstart+1; i<c->ntoks; i++) {
3129 	    tok = &c->toks[i];
3130 	    if (!token_done(tok) && tok->type == TOK_CBSTR) {
3131 		/* catch stuff in braces */
3132 		tok->flag |= TOK_DONE;
3133 		c->param = tok->s;
3134 	    }
3135 	}
3136     } else if (c->ci == MODPRINT) {
3137 	/* if present, 'extra' gets pushed as an option */
3138 	int regtoks = n_regular_tokens(c);
3139 
3140 	if (regtoks - c->cstart - 1 == 3) {
3141 	    /* got three arguments */
3142 	    char *extra;
3143 
3144 	    tok = &c->toks[regtoks - 1];
3145 	    if (!token_done(tok) && tok->type == TOK_NAME) {
3146 		tok->flag |= TOK_DONE;
3147 		extra = gretl_strdup(tok->s);
3148 		if (extra == NULL) {
3149 		    c->err = E_ALLOC;
3150 		} else {
3151 		    c->opt |= OPT_A;
3152 		    c->err = push_option_param(c->ci, OPT_A, extra);
3153 		}
3154 	    }
3155 	}
3156     } else if (c->ci == STORE) {
3157 	/* progressive loop: 'extra' goes into parm2 */
3158 	for (i=c->cstart+1; i<c->ntoks; i++) {
3159 	    tok = &c->toks[i];
3160 	    if (!token_done(tok) && tok->type == TOK_NAME) {
3161 		tok->flag |= TOK_DONE;
3162 		c->parm2 = gretl_str_expand(&c->parm2, tok->s, " ");
3163 	    }
3164 	}
3165     } else if (c->ci == DATAMOD) {
3166 	/* "datamod compact": 'extra' into parm2 */
3167 	for (i=c->cstart+1; i<c->ntoks; i++) {
3168 	    tok = &c->toks[i];
3169 	    if (!token_done(tok) &&
3170 		(tok->type == TOK_NAME || tok->type == TOK_INT)) {
3171 		tok->flag |= TOK_DONE;
3172 		c->parm2 = gretl_str_expand(&c->parm2, tok->s, " ");
3173 	    }
3174 	}
3175     } else if (c->ci == JOIN) {
3176 	/* join: allow for multiple import names */
3177 	for (i=c->cstart+1; i<c->ntoks; i++) {
3178 	    tok = &c->toks[i];
3179 	    if (!token_done(tok) && tok->type == TOK_NAME) {
3180 		tok->flag |= TOK_DONE;
3181 		c->parm2 = gretl_str_expand(&c->parm2, tok->s, " ");
3182 	    } else if (!token_done(tok) && wildsym(tok)) {
3183 		tok->flag |= TOK_DONE;
3184 		c->parm2 = gretl_str_expand(&c->parm2, tok->s, "");
3185 	    }
3186 	}
3187     }
3188 
3189     return c->err;
3190 }
3191 
3192 /* @vstart is a const pointer into the incoming command
3193    line, holding a "genr"-type expression, a string to
3194    be passed to the shell, or a varargs expression.
3195 */
3196 
set_command_vstart(CMD * cmd,ExecState * state,PRN * prn)3197 static int set_command_vstart (CMD *cmd, ExecState *state,
3198 			       PRN *prn)
3199 {
3200     cmd_token *tok;
3201     const char *s = NULL;
3202 
3203     if (cmd->ciflags & CI_EXPR) {
3204 	tok = &cmd->toks[cmd->cstart];
3205 	s = tok->lp;
3206 	if (!cmd->context && expr_keep_cmdword(cmd->ci)) {
3207 	    ; /* leave it alone */
3208 	} else if (cmd->ci == EVAL) {
3209 	    cmd->gtype = GRETL_TYPE_NONE;
3210 	    s += 4;
3211 	} else if (cmd->ci == GENR && cmd->gtype != GRETL_TYPE_ANY) {
3212 	    /* skip initial command word */
3213 	    s += strlen(tok->s);
3214 	} else if (cmd->ci != GENR && cmd->ci != cmd->context) {
3215 	    /* skip initial command word */
3216 	    s += strlen(tok->s);
3217 	}
3218     } else if (cmd->ciflags & CI_VARGS) {
3219 	/* vstart should point beyond the last token */
3220 	tok = &cmd->toks[cmd->ntoks-1];
3221 	s = tok->lp + real_toklen(tok);
3222     }
3223 
3224     if (s != NULL) {
3225 	s += strspn(s, " \t");
3226 	if (*s == '\0') {
3227 	    s = NULL;
3228 	}
3229     }
3230 
3231     if (cmd->gtype == GRETL_TYPE_LIST || (cmd->ciflags & CI_LGEN)) {
3232 	/* we won't accept ';' as list separator outside of an
3233 	   appropriate command context. This breaks the old version
3234 	   of the system "equations" mechanism but I'm afraid that's
3235 	   just too bad.
3236 	*/
3237 	if (strchr(s, ';')) {
3238 	    if (prn != NULL) {
3239 		pputc(prn, '\n');
3240 		pputs(prn, "If you are trying to assemble a compound list for use in a "
3241 		      "\"system\" block,\nplease see section 31.2 of the Gretl User's "
3242 		      "Guide for the current method.\n");
3243 		pputc(prn, '\n');
3244 	    }
3245 	    gretl_errmsg_sprintf(_("The symbol '%c' is not valid in this context\n"),
3246 				 ';');
3247 	    return E_INVARG;
3248 	}
3249     }
3250 
3251 #if SEMIC_TEST
3252     if (state != NULL && s != NULL && strchr(s, ';')) {
3253 	char *p = free_semicolon((char *) s);
3254 
3255 	if (p != NULL) {
3256 	    *p = '\0';
3257 	    p++;
3258 	    state->more = p + strspn(p, " \t");
3259 # if 0
3260 	    fprintf(stderr, "SEMIC_TEST: s->more = '%s'\n", state->more);
3261 # endif
3262 	}
3263     }
3264 #endif
3265 
3266     cmd->vstart = s;
3267 
3268     return 0;
3269 }
3270 
3271 /* For a command that ends with varargs, do we have the required
3272    leading non-vararg parameter(s)? (This means either one or two
3273    parameters, followed by a comma.) If so, we can stop parsing
3274    and designate the remainder of the command line as the
3275    varargs portion.
3276 */
3277 
got_param_tokens(CMD * cmd)3278 static int got_param_tokens (CMD *cmd)
3279 {
3280     int mintoks = (cmd->ciflags & CI_PARM2)? 3 : 2;
3281     int i, n = cmd->ntoks - cmd->cstart - 1;
3282 
3283     for (i=cmd->cstart+1; i<cmd->ntoks; i++) {
3284 	if (cmd->toks[i].flag & TOK_JOINED) {
3285 	    /* avoid over-counting */
3286 	    n--;
3287 	}
3288     }
3289 
3290     if (cmd->ci == PRINTF || cmd->ci == SPRINTF) {
3291 	i = cmd->cstart + 2;
3292 	if (i < cmd->ntoks && cmd->toks[i].type == TOK_COMMA) {
3293 	    if (cmd->ci == PRINTF) {
3294 		mark_token_done(cmd->toks[i]);
3295 	    } else {
3296 		/* sprintf: redundant comma after varname */
3297 		mark_token_ignored(cmd->toks[i]);
3298 		mintoks++;
3299 	    }
3300 	}
3301     }
3302 
3303     return n == mintoks;
3304 }
3305 
check_end_command(CMD * cmd)3306 static int check_end_command (CMD *cmd)
3307 {
3308     int endci = gretl_command_number(cmd->param);
3309 
3310     if (endci == OUTFILE) {
3311 	/* special case, alias */
3312 	cmd->ci = OUTFILE;
3313 	cmd->opt = OPT_C;
3314 	return 0;
3315     }
3316 
3317     if (endci != cmd->context) {
3318 	gretl_errmsg_sprintf(_("end: invalid parameter '%s'"), cmd->param);
3319 	cmd->err = E_DATA;
3320     }
3321 
3322     /* on "end", scrub the context */
3323     cmd->context = 0;
3324 
3325     return cmd->err;
3326 }
3327 
3328 /* For a command that (usually) requires a list, check
3329    that we got one, and if so, check that it doesn't
3330    contain duplicates.
3331 */
3332 
check_for_list(CMD * cmd)3333 static int check_for_list (CMD *cmd)
3334 {
3335     if (cmd->list == NULL) {
3336 	if (cmd->ciflags & CI_DOALL) {
3337 	    ; /* list defaults to all series, OK */
3338 	} else if (cmd->ci == OMIT && (cmd->opt & OPT_A)) {
3339 	    ; /* the auto-omit option, OK */
3340 	} else if ((cmd->ci == FREQ || cmd->ci == BDS) && (cmd->opt & OPT_X)) {
3341 	    ; /* using a matrix: may be OK */
3342 	} else {
3343 	    fprintf(stderr, "check_for_list: cmd->list is NULL\n");
3344 	    cmd->err = E_ARGS;
3345 	}
3346     } else {
3347 	/* check for duplicated variables */
3348 	int dupv = gretl_list_duplicates(cmd->list, cmd->ci);
3349 
3350 	if (dupv >= 0) {
3351 	    printlist(cmd->list, "command with duplicate(s)");
3352 	    cmd->err = E_DATA;
3353 	    gretl_errmsg_sprintf(_("variable %d duplicated in the "
3354 				   "command list."), dupv);
3355 	}
3356     }
3357 
3358     return cmd->err;
3359 }
3360 
3361 /* @cmd has the CI_LCHK (ambiguity) flag set: see if
3362    we're able to disambiguate by this point
3363 */
3364 
scrub_list_check(CMD * cmd)3365 static int scrub_list_check (CMD *cmd)
3366 {
3367     int maxtoks = cmd->toks[0].type == TOK_CATCH ? 4 : 3;
3368     int ret = 0;
3369 
3370     if (cmd->ntoks == maxtoks) {
3371 	const char *s = cmd->toks[maxtoks-1].s;
3372 	int ci = gretl_command_number(s);
3373 
3374 	if (ci == DELEET || ci == PRINT) {
3375 	    cmd->toks[maxtoks-1].flag |= TOK_DONE;
3376 	    cmd->ci = ci;
3377 	    cmd->opt = OPT_L;
3378 	    cmd->ciflags = CI_PARM1;
3379 	    ret = 1;
3380 	} else {
3381 	    cmd->ciflags ^= CI_LCHK;
3382 	    ret = 1;
3383 	}
3384     }
3385 
3386     return ret;
3387 }
3388 
unexpected_symbol_error(char c)3389 static int unexpected_symbol_error (char c)
3390 {
3391     if (c == '\'') {
3392 	gretl_errmsg_sprintf(_("Unexpected symbol %c"), c);
3393     } else {
3394 	gretl_errmsg_sprintf(_("Unexpected symbol '%c'"), c);
3395     }
3396     return E_PARSE;
3397 }
3398 
utf8_fail(char * s)3399 static int utf8_fail (char *s)
3400 {
3401     if (!g_utf8_validate(s, -1, NULL)) {
3402 	gretl_errmsg_set(_("Command line is not valid UTF-8"));
3403 	return E_DATA;
3404     } else {
3405 	return 0;
3406     }
3407 }
3408 
3409 /* @c is A-Z or a-z */
3410 
is_ascii_alpha(int c)3411 static int is_ascii_alpha (int c)
3412 {
3413     return (c >= 0x41 && c <= 0x5A) ||
3414 	(c >= 0x61 && c <= 0x7A);
3415 }
3416 
3417 #define MAY_START_NUMBER(c) (c == '.' || c == '-' || c == '+')
3418 
3419 /* tokenize_line: parse @line into a set of tokens on a
3420    lexical basis. In some cases constitution of command
3421    arguments will require compositing tokens. We get a
3422    little semantic help from the CI_FNAME flag: if this
3423    is present for a given command, that tells us to
3424    consider a filename containing directory separators
3425    as a unitary token.
3426 */
3427 
tokenize_line(ExecState * state,DATASET * dset,int compmode)3428 static int tokenize_line (ExecState *state, DATASET *dset,
3429 			  int compmode)
3430 {
3431     char tok[FN_NAMELEN];
3432     char *s = state->line;
3433     CMD *cmd = state->cmd;
3434     char *vtok;
3435     int n, m, pos = 0;
3436     int wild_ok = 0;
3437     int at_ok = compmode;
3438     int want_fname = 0;
3439     int err = 0;
3440 
3441 #if CDEBUG || TDEBUG
3442     fprintf(stderr, "*** %s: line = '%s'\n",
3443 	    compmode ? "get_command_index" : "parse_command_line", s);
3444     fprintf(stderr, " first byte %0x\n", (unsigned char) s[0]);
3445 #endif
3446 
3447     if (utf8_fail(s)) {
3448 	return E_DATA;
3449     }
3450 
3451     gretl_push_c_numeric_locale();
3452     state->more = NULL;
3453 
3454     if (!gretl_in_batch_mode() && *s == '=') {
3455 	/* treat as a bare expression to be evaluated */
3456 	cmd->ci = EVAL;
3457 	cmd->gtype = GRETL_TYPE_NONE;
3458 	cmd->vstart = s + 1;
3459 	goto skipit;
3460     }
3461 
3462     while (!err && *s) {
3463 	int skipped = 0;
3464 
3465 	*tok = '\0';
3466 
3467 	if (*s == '-') {
3468 	    want_fname = 0;
3469 	}
3470 
3471 	if (*s == '#') {
3472 	    break;
3473 	} else if (want_fname && *s != '"' && !isspace(*s)) {
3474 	    n = strcspn(s, " \t");
3475 	    if (n < FN_NAMELEN) {
3476 		strncat(tok, s, n);
3477 		err = push_string_token(cmd, tok, s, pos);
3478 	    } else {
3479 		vtok = gretl_strndup(s, n);
3480 		if (vtok == NULL) {
3481 		    err = E_ALLOC;
3482 		} else {
3483 		    err = push_string_token(cmd, vtok, s, pos);
3484 		    free(vtok);
3485 		}
3486 	    }
3487 	} else if (wild_ok && (is_ascii_alpha((int) *s) || *s == '*')) {
3488 	    n = 1 + wild_spn(s+1);
3489 	    m = (n < FN_NAMELEN)? n : FN_NAMELEN - 1;
3490 	    strncat(tok, s, m);
3491 	    err = push_string_token(cmd, tok, s, pos);
3492 	} else if (is_ascii_alpha((int) *s) || *s == '$' || (at_ok && *s == '@')) {
3493 	    /* regular or accessor identifier */
3494 	    if (*s == '@' && !compmode) {
3495 		fprintf(stderr, "tokenize: found '@':\n '%s'\n", state->line);
3496 	    }
3497 	    n = 1 + namechar_spn(s+1);
3498 	    m = (n < FN_NAMELEN)? n : FN_NAMELEN - 1;
3499 	    strncat(tok, s, m);
3500 	    err = push_string_token(cmd, tok, s, pos);
3501 	} else if (is_greek_letter(s)) {
3502 	    n = 2;
3503 	    strncat(tok, s, n);
3504 	    err = push_string_token(cmd, tok, s, pos);
3505 	} else if (ldelim(*s)) {
3506 	    /* left-hand delimiter that needs to be paired */
3507 	    n = closing_delimiter_pos(s);
3508 	    if (n < 0) {
3509 		gretl_errmsg_sprintf(_("Unmatched '%c'\n"), *s);
3510 		err = E_PARSE;
3511 	    } else if (n < FN_NAMELEN) {
3512 		strncat(tok, s+1, n);
3513 		err = push_delimited_token(cmd, tok, s, pos);
3514 	    } else {
3515 		vtok = gretl_strndup(s+1, n);
3516 		if (vtok == NULL) {
3517 		    err = E_ALLOC;
3518 		} else {
3519 		    err = push_delimited_token(cmd, vtok, s, pos);
3520 		    free(vtok);
3521 		}
3522 	    }
3523 	    n += 2;
3524 	} else if (*s == '"') {
3525 	    n = closing_quote_pos(s, cmd->ci);
3526 	    if (n < 0) {
3527 		gretl_errmsg_sprintf(_("Unmatched '%c'\n"), '"');
3528 		err = E_PARSE;
3529 	    } else {
3530 		err = push_quoted_token(cmd, s, n, pos);
3531 	    }
3532 	    n += 2;
3533 	} else if ((n = symbol_spn(s)) > 0) {
3534 	    if (n == 1 && MAY_START_NUMBER(*s) && isdigit(*(s+1))) {
3535 		n = numeric_spn(s, 0);
3536 		if (n == 0) {
3537 		    err = unexpected_symbol_error(*s);
3538 		} else {
3539 		    m = (n < FN_NAMELEN)? n : FN_NAMELEN - 1;
3540 		    strncat(tok, s, m);
3541 		    err = push_numeric_token(cmd, tok, s, pos);
3542 		}
3543 	    } else {
3544 		/* operator / symbol */
3545 		m = (n < FN_NAMELEN)? n : FN_NAMELEN - 1;
3546 		strncat(tok, s, m);
3547 		err = push_symbol_token(cmd, tok, s, pos);
3548 	    }
3549 	} else if (isdigit(*s)) {
3550 	    /* numeric string */
3551 	    n = numeric_spn(s, 1);
3552 	    m = (n < FN_NAMELEN)? n : FN_NAMELEN - 1;
3553 	    strncat(tok, s, m);
3554 	    err = push_numeric_token(cmd, tok, s, pos);
3555 	} else if (isspace(*s) || *s == (char) 0xA0) {
3556 	    /* handle stupid "non-breaking space" here too */
3557 	    n = 1;
3558 	    skipped = 1;
3559 	} else if (*s == '@' && (compmode || gretl_if_state_false())) {
3560 	    /* string substitution not yet done */
3561 	    n = 1;
3562 	    skipped = 1;
3563 	} else if (*s == '_') {
3564 	    /* unassigned call to a hidden function? */
3565 	    n = 1 + namechar_spn(s+1);
3566 	    m = (n < FN_NAMELEN)? n : FN_NAMELEN - 1;
3567 	    strncat(tok, s, m);
3568 	    err = push_string_token(cmd, tok, s, pos);
3569 	} else {
3570 	    err = unexpected_symbol_error(*s);
3571 	}
3572 
3573 	if (err) {
3574 	    break;
3575 	}
3576 
3577 	if (!skipped && want_fname) {
3578 	    want_fname = 0;
3579 	}
3580 
3581 	if (!skipped && cmd->ci == 0 && cmd->ntoks > 0) {
3582 	    /* use current info to determine command index? */
3583 	    int imin = min_token_index(cmd, compmode);
3584 
3585 	    if (cmd->ntoks > imin) {
3586 		try_for_command_index(cmd, imin, dset, compmode, &err);
3587 #if TDEBUG
3588 		if (cmd->ci > 0) {
3589 		    fprintf(stderr, "ntoks=%d, imin=%d, ci=%d (%s)\n",
3590 			    cmd->ntoks, imin, cmd->ci, gretl_command_word(cmd->ci));
3591 		} else {
3592 		    fprintf(stderr, "ntoks=%d, imin=%d, ci not yet known\n",
3593 			    cmd->ntoks, imin);
3594 		}
3595 #endif
3596 		if (cmd->ci == PRINT && peek_next_char(cmd, imin) != '"') {
3597 		    cmd->ciflags |= CI_LIST;
3598 		}
3599 		if (cmd->ciflags & CI_FNAME) {
3600 		    want_fname = 1;
3601 		}
3602 		if (cmd->ci == LOOP && compmode == LOOP) {
3603 		    /* we need to pick up the specifics of the loop */
3604 		    compmode = 0;
3605 		}
3606 	    }
3607 	}
3608 
3609 	/* when we're just looking for a command index (compmode),
3610 	   we may be able to get out early
3611 	*/
3612 	if (compmode && simple_flow_control(cmd)) {
3613 	    if (string_is_blank(s)) {
3614 		break;
3615 	    }
3616 	} else if (compmode && (cmd->ci > 0 || cmd->ntoks == 3)) {
3617 	    /* either we've got the command index or it seems
3618 	       we're not going to get it */
3619 	    break;
3620 	}
3621 
3622 	if (cmd->ciflags & CI_LCHK) {
3623 	    /* handle ambiguity of "list ..." */
3624 	    wild_ok = !scrub_list_check(cmd);
3625 	}
3626 
3627 	if (cmd->ci == DELEET || (cmd->ciflags & CI_LIST)) {
3628 	    /* flag acceptance of wildcard expressions */
3629 	    wild_ok = 1;
3630 	} else if ((cmd->ciflags & CI_EXPR) && !(cmd->ciflags & CI_LCHK)) {
3631 	    /* the remainder of line will be parsed elsewhere */
3632 	    break;
3633 	} else if ((cmd->ciflags & CI_ADHOC) && (cmd->ciflags & CI_NOOPT)) {
3634 	    /* ditto */
3635 	    cmd->vstart = s + n;
3636 	    break;
3637 	}
3638 
3639 	if ((cmd->ciflags & CI_VARGS) && got_param_tokens(cmd)) {
3640 	    /* remaining args will be parsed elsewhere */
3641 	    break;
3642 	}
3643 
3644 	s += n;
3645 	pos += n;
3646     }
3647 
3648  skipit:
3649 
3650     gretl_pop_c_numeric_locale();
3651 
3652 #if CDEBUG
3653     if (err) {
3654 	fprintf(stderr, "tokenize_line: err = %d\n", err);
3655     }
3656 #endif
3657 
3658     return err;
3659 }
3660 
3661 /* for use with spreadsheet option params */
3662 
small_positive_int(const char * s)3663 static int small_positive_int (const char *s)
3664 {
3665     if (integer_string(s)) {
3666 	int k = atoi(s);
3667 
3668 	if (k > 0 && k <= 10) {
3669 	    return 1;
3670 	}
3671     }
3672 
3673     return 0;
3674 }
3675 
3676 /* The following is kind of a curiosity -- translation from
3677    option parameters to a special list, for handling
3678    spreadsheet-specific options for "open" or "append".
3679    For for the moment I'm going to leave it close to what
3680    was in the "old" (pre-tokenize) interact.c. It can be
3681    revisited later. AC, 2014-08-30
3682 */
3683 
post_process_spreadsheet_options(CMD * cmd)3684 static int post_process_spreadsheet_options (CMD *cmd)
3685 {
3686     int err = 0;
3687 
3688     if (cmd->opt & OPT_O) {
3689 	/* odbc: spreadsheet-specific options not acceptable */
3690 	err = incompatible_options(cmd->opt, OPT_O | OPT_C |
3691 				   OPT_R | OPT_S);
3692     } else if (cmd->opt & OPT_W) {
3693 	/* web database: ditto */
3694 	err = incompatible_options(cmd->opt, OPT_W | OPT_C |
3695 				   OPT_R | OPT_S);
3696     }
3697 
3698     if (!err) {
3699 	err = incompatible_options(cmd->opt, OPT_O | OPT_W);
3700     }
3701 
3702     if (!err && (cmd->opt & (OPT_R | OPT_C | OPT_S))) {
3703 	/* row offset, column offset, sheet name/number */
3704 	const char *s = NULL;
3705 	int r0 = 0, c0 = 0;
3706 
3707 	if (cmd->opt & OPT_R) {
3708 	    /* --rowoffset */
3709 	    r0 = get_optval_int(cmd->ci, OPT_R, &err);
3710 	}
3711 
3712 	if (!err && (cmd->opt & OPT_C)) {
3713 	    /* --coloffset */
3714 	    c0 = get_optval_int(cmd->ci, OPT_C, &err);
3715 	}
3716 
3717 	if (!err && (cmd->opt & OPT_S)) {
3718 	    /* --sheet */
3719 	    s = get_optval_string(cmd->ci, OPT_S);
3720 	    if (s == NULL) {
3721 		err = E_DATA;
3722 	    }
3723 	}
3724 
3725 	if (!err) {
3726 	    int slist[4] = {3, 0, c0, r0};
3727 
3728 	    free(cmd->list);
3729 	    cmd->list = gretl_list_copy(slist);
3730 	    if (cmd->list == NULL) {
3731 		err = E_ALLOC;
3732 	    } else {
3733 		/* note: dodgy heuristic here? */
3734 		if (small_positive_int(s)) {
3735 		    /* take the --sheet spec as giving a sheet
3736 		       number (1-based) */
3737 		    cmd->list[1] = atoi(s);
3738 		} else if (s != NULL) {
3739 		    /* take it as giving a sheet name */
3740 		    free(cmd->parm2);
3741 		    cmd->parm2 = gretl_strdup(s);
3742 		    if (cmd->parm2 == NULL) {
3743 			err = E_ALLOC;
3744 		    }
3745 		}
3746 	    }
3747 	}
3748     }
3749 
3750     return err;
3751 }
3752 
post_process_rename_param(CMD * cmd,DATASET * dset)3753 static int post_process_rename_param (CMD *cmd,
3754 				      DATASET *dset)
3755 {
3756     int err = 0;
3757 
3758     if (integer_string(cmd->param)) {
3759 	cmd->auxint = atoi(cmd->param);
3760 	if (cmd->auxint < 1 || cmd->auxint >= dset->v) {
3761 	    err = E_DATA;
3762 	}
3763     } else {
3764 	cmd->auxint = current_series_index(dset, cmd->param);
3765 	if (cmd->auxint < 0) {
3766 	    err = E_UNKVAR;
3767 	}
3768     }
3769 
3770     return err;
3771 }
3772 
post_process_sprintf_command(CMD * cmd,char * line)3773 static int post_process_sprintf_command (CMD *cmd,
3774 					 char *line)
3775 {
3776     int err = 0;
3777 
3778     set_deprecation("sprintf", "sprintf()", 1);
3779 
3780     *line = '\0';
3781 
3782     if (cmd->vstart != NULL) {
3783 	gchar *tmp = g_strdup_printf("%s=sprintf(\"%s\",%s)", cmd->param,
3784 				     cmd->parm2, cmd->vstart);
3785 
3786 	strcpy(line, tmp);
3787 	g_free(tmp);
3788     } else {
3789 	sprintf(line, "%s=sprintf(\"%s\")", cmd->param, cmd->parm2);
3790     }
3791 
3792     cmd->ci = GENR;
3793     cmd->gtype = GRETL_TYPE_STRING;
3794     cmd->vstart = line;
3795 
3796     return err;
3797 }
3798 
3799 /* check the commands that have the CI_INFL flag:
3800    the precise line-up of required arguments may
3801    depend on the option(s) specified
3802 */
3803 
handle_option_inflections(CMD * cmd)3804 static void handle_option_inflections (CMD *cmd)
3805 {
3806     if (cmd->ci == BXPLOT) {
3807 	if (cmd->opt & OPT_Z) {
3808 	    /* factorized: two variables wanted */
3809 	    cmd->ciflags |= CI_LLEN2;
3810 	} else if (cmd->opt & OPT_X) {
3811 	    /* with --matrix, default to all columns */
3812 	    cmd->ciflags |= CI_DOALL;
3813 	}
3814     } else if (cmd->ci == SMPL) {
3815 	if (cmd->opt & (OPT_M | OPT_A | OPT_C)) {
3816 	    /* no-missing, no-all-missing or contiguous */
3817 	    cmd->ciflags = CI_LIST | CI_DOALL;
3818 	} else if (cmd->opt & OPT_R) {
3819 	    /* restrict */
3820 	    cmd->ciflags = CI_ADHOC;
3821 	} else if (cmd->opt & OPT_F) {
3822 	    /* full: no args */
3823 	    cmd->ciflags = 0;
3824 	} else if (cmd->opt & OPT_O) {
3825 	    /* using dummy variable */
3826 	    cmd->ciflags &= ~CI_PARM2;
3827 	} else if (cmd->opt & OPT_N) {
3828 	    /* random sample */
3829 	    cmd->ciflags = CI_PARM1;
3830 	}
3831     } else if (cmd->ci == SET) {
3832 	if (cmd->opt & (OPT_F | OPT_T)) {
3833 	    /* from file, to file */
3834 	    cmd->ciflags = 0;
3835 	}
3836     } else if (cmd->ci == OUTFILE) {
3837 	if (cmd->opt & OPT_C) {
3838 	    cmd->ciflags = 0;
3839 	}
3840     } else if (cmd->ci == GNUPLOT) {
3841 	if (cmd->opt & OPT_I) {
3842 	    /* we got the input=... option, so no args wanted */
3843 	    cmd->ciflags = 0;
3844 	} else if (cmd->opt & OPT_X) {
3845 	    /* with --matrix, default to all columns */
3846 	    cmd->ciflags |= CI_DOALL;
3847 	}
3848     } else if (cmd->ci == OPEN) {
3849 	if (cmd->opt & OPT_O) {
3850 	    /* --odbc */
3851 	    cmd->ciflags = CI_ADHOC;
3852 	}
3853     } else if (cmd->ci == DELEET) {
3854 	if (cmd->opt == OPT_T) {
3855 	    /* --type=... */
3856 	    cmd->ciflags = 0;
3857 	} else if (cmd->opt == OPT_D) {
3858 	    /* --db */
3859 	    cmd->ciflags = CI_ADHOC;
3860 	}
3861     } else if (cmd->ci == PRINT) {
3862 	if (cmd->opt == OPT_L) {
3863 	    /* --list */
3864 	    cmd->ciflags = CI_PARM1;
3865 	}
3866     } else if (cmd->ci == MODELTAB) {
3867 	if (cmd->opt == OPT_O) {
3868 	    /* --output: no arg needed */
3869 	    cmd->ciflags &= ~CI_PARM1;
3870 	}
3871     } else if (cmd->ci == XTAB) {
3872 	if (cmd->opt & OPT_X) {
3873 	    /* --matrix: no list wanted */
3874 	    cmd->ciflags &= ~CI_LIST;
3875 	}
3876     } else if (cmd->ci == SETINFO) {
3877 	if (cmd->opt & (OPT_M | OPT_C | OPT_D)) {
3878 	    /* midas, continuous or discrete */
3879 	    if (!(cmd->opt & (OPT_G | OPT_I))) {
3880 		/* but not graph-name or description */
3881 		cmd->ciflags &= ~CI_LLEN1;
3882 	    }
3883 	}
3884     }
3885 }
3886 
assemble_command(CMD * cmd,DATASET * dset,ExecState * s,char * line)3887 static int assemble_command (CMD *cmd, DATASET *dset,
3888 			     ExecState *s, char *line)
3889 {
3890     /* defer handling option(s) till param is known? */
3891     int options_later = cmd->ci == SETOPT;
3892     PRN *prn = NULL;
3893 
3894     if (cmd->ntoks == 0) {
3895 	return cmd->err;
3896     }
3897 
3898     if (s != NULL) {
3899 	prn = s->prn;
3900     }
3901 
3902 #if CDEBUG > 1
3903     fprintf(stderr, "doing assemble_command...\n");
3904 #endif
3905 
3906     if (!never_takes_options(cmd) && !options_later) {
3907 	cmd->err = check_command_options(cmd);
3908     }
3909 
3910     if (!cmd->err) {
3911 	handle_command_preamble(cmd);
3912     }
3913 
3914     if (cmd->err) {
3915 	goto bailout;
3916     }
3917 
3918     if (matrix_data_option(cmd->ci, cmd->opt)) {
3919 	/* using matrix argument, plain ints ok in list */
3920 	cmd->ciflags |= CI_L1INT;
3921     }
3922 
3923     if (cmd->ci == PRINT && !(cmd->opt & OPT_L)) {
3924 	if (first_arg_quoted(cmd)) {
3925 	    /* printing a string literal */
3926 	    cmd->ciflags = CI_PARM1;
3927 	} else {
3928 	    /* assume for now that we're printing series */
3929 	    cmd->ciflags |= (CI_LIST | CI_DOALL);
3930 	}
3931     } else if (option_inflected(cmd)) {
3932 	handle_option_inflections(cmd);
3933     }
3934 
3935     /* legacy stuff */
3936 
3937     if (cmd->ci == TABPRINT || cmd->ci == TABPRINT) {
3938 	legacy_get_filename(cmd);
3939     } else if (cmd->ci == SETINFO) {
3940 	get_quoted_dash_fields(cmd, "dn");
3941     }
3942 
3943     if (cmd->err) {
3944 	goto bailout;
3945     }
3946 
3947     /* main command assembly begins */
3948 
3949     if (cmd->ciflags & CI_PARM1) {
3950 	get_param(cmd, dset);
3951     } else if (cmd->ciflags & CI_ORD1) {
3952 	get_command_order(cmd);
3953     }
3954 
3955     if (!cmd->err && cmd->ci == VECM) {
3956 	get_vecm_rank(cmd);
3957     }
3958 
3959     if (!cmd->err && (cmd->ciflags & CI_EXTRA)) {
3960 	handle_command_extra(cmd);
3961     }
3962 
3963     if (!cmd->err && options_later) {
3964 	cmd->err = check_command_options(cmd);
3965     }
3966 
3967     if (!cmd->err && (cmd->ciflags & CI_PARM2)) {
3968 	get_parm2(cmd, options_later);
3969     }
3970 
3971     if (!cmd->err && (cmd->ciflags & CI_LIST)) {
3972 	if (count_remaining_toks(cmd) > 0) {
3973 	    process_command_list(cmd, dset);
3974 	}
3975     }
3976 
3977     if (!cmd->err && (cmd->ciflags & CI_ORD2)) {
3978 	get_optional_order(cmd);
3979     }
3980 
3981     if (!cmd->err && (cmd->ciflags & CI_LIST)) {
3982 	check_for_list(cmd);
3983     }
3984 
3985     if (!cmd->err) {
3986 	if (cmd->ciflags & CI_ADHOC) {
3987 	    handle_adhoc_string(cmd);
3988 	} else if (cmd->ciflags & (CI_EXPR | CI_VARGS)) {
3989 	    cmd->err = set_command_vstart(cmd, s, prn);
3990 	}
3991     }
3992 
3993     if (!cmd->err && cmd->ci == END) {
3994 	check_end_command(cmd);
3995     }
3996 
3997     if (!cmd->err) {
3998 	check_for_stray_tokens(cmd);
3999     }
4000 
4001  bailout:
4002 
4003 #if CDEBUG
4004     print_tokens(cmd);
4005 #endif
4006 
4007     if (!cmd->err) {
4008 	if (cmd->opt != OPT_NONE &&
4009 	    (cmd->ci == OPEN || cmd->ci == APPEND)) {
4010 	    cmd->err = post_process_spreadsheet_options(cmd);
4011 	} else if (cmd->ci == RENAME) {
4012 	    cmd->err = post_process_rename_param(cmd, dset);
4013 	} else if (cmd->ci == SPRINTF && line != NULL) {
4014 	    cmd->err = post_process_sprintf_command(cmd, line);
4015 	}
4016     }
4017 
4018     return cmd->err;
4019 }
4020 
maybe_init_shadow(void)4021 static void maybe_init_shadow (void)
4022 {
4023     static int shadow_initted;
4024 
4025     if (!shadow_initted) {
4026 	check_for_shadowed_commands();
4027 	shadow_initted = 1;
4028     }
4029 }
4030 
get_or_set_errline(const char * s,int set)4031 static char *get_or_set_errline (const char *s, int set)
4032 {
4033     static char *errline;
4034 
4035     if (set) {
4036 	free(errline);
4037 	errline = gretl_strdup(s);
4038     }
4039 
4040     return errline;
4041 }
4042 
get_parser_errline(void)4043 const char *get_parser_errline (void)
4044 {
4045     return get_or_set_errline(NULL, 0);
4046 }
4047 
4048 /* When the current "if-state" is FALSE, and we're scanning
4049    the current command/statement, it should be sufficient to
4050    determine if we have a command which potentially modifies
4051    the if-state.
4052 */
4053 
get_flow_control_ci(ExecState * state)4054 static int get_flow_control_ci (ExecState *state)
4055 {
4056     char word[6], *s = state->line;
4057     int ci = 0;
4058 
4059     if (sscanf(s, "%5s", word) == 1) {
4060 	int n = 0;
4061 
4062 	if (!strcmp(word, "if")) {
4063 	    ci = IF;
4064 	    n = 2;
4065 	} else if (!strcmp(word, "else")) {
4066 	    ci = ELSE;
4067 	    n = 4;
4068 	} else if (!strcmp(word, "elif")) {
4069 	    ci = ELIF;
4070 	    n = 4;
4071 	} else if (!strcmp(word, "endif")) {
4072 	    ci = ENDIF;
4073 	    n = 5;
4074 	}
4075 	if (ci > 0 && s[n] != '\0' && !isspace(s[n])) {
4076 	    ci = 0;
4077 	}
4078 	if (s[n] != '\0' && (ci == IF || ci == ELIF)) {
4079 	    /* set pointer to the condition */
4080 	    state->cmd->vstart = s + n + 1;
4081 	}
4082     }
4083 
4084     return ci;
4085 }
4086 
real_parse_command(ExecState * s,DATASET * dset,int compmode,void * ptr)4087 static int real_parse_command (ExecState *s,
4088 			       DATASET *dset,
4089 			       int compmode,
4090 			       void *ptr)
4091 {
4092     char *line = s->line;
4093     CMD *cmd = s->cmd;
4094     int err = 0;
4095 
4096 #if CDEBUG
4097     fprintf(stderr, "real_parse: '%s', compmode = %d\n", line, compmode);
4098 #endif
4099 
4100     maybe_init_shadow();
4101 
4102     if (*line != '\0') {
4103 	if (!compmode && gretl_if_state_false()) {
4104 	    /* take a short-cut */
4105 	    cmd->ci = get_flow_control_ci(s);
4106 	} else {
4107 	    /* not compiling or not blocked */
4108 	    err = tokenize_line(s, dset, compmode);
4109 	}
4110 
4111 	if (!err && simple_flow_control(cmd)) {
4112 	    /* These don't go to assemble_command(), so check
4113 	       them here for extraneous junk.
4114 	    */
4115 	    err = check_for_stray_tokens(cmd);
4116 	}
4117 
4118 	if (err) {
4119 	    goto parse_exit;
4120 	}
4121 
4122 	if (compmode) {
4123 	    /* Are we doing get_command_index(), for compilation?
4124 	       In that case we shouldn't do any further processing
4125 	       unless we got a nested loop command (in which case we
4126 	       want to extract the options), or we got a ci that
4127 	       ought to be unitary.
4128 	    */
4129 	    if (compmode == LOOP && cmd->ci == LOOP) {
4130 		err = assemble_command(cmd, dset, s, line);
4131 		compmode = 0;
4132 	    } else if (cmd->ci == IF || cmd->ci == ELIF) {
4133 		err = set_command_vstart(cmd, s, s->prn);
4134 	    }
4135 #if SEMIC_TEST
4136 	    else if (cmd->ci == GENR) {
4137 		set_command_vstart(cmd, s, s->prn);
4138 	    }
4139 #endif
4140 	    goto parse_exit;
4141 	}
4142 
4143 	/* cmd->vstart must be set for the benefit of flow_control():
4144 	   it will hold the condition attached to IF or ELIF
4145 	*/
4146 	if ((cmd->ci == IF || cmd->ci == ELIF) && cmd->vstart == NULL) {
4147 	    err = set_command_vstart(cmd, s, s->prn);
4148 	}
4149 
4150 	/* If we haven't already hit an error, then we need to consult
4151 	   and perhaps modify the flow control state -- and if we're
4152 	   blocked, return.
4153 	*/
4154 	if (!err && flow_control(s, dset, ptr)) {
4155 	    if (cmd->err) {
4156 		/* we hit an error evaluating the if state */
4157 		err = cmd->err;
4158 	    } else {
4159 		cmd->ci = CMD_MASKED;
4160 	    }
4161 	    goto parse_exit;
4162 	}
4163 
4164 	/* Otherwise proceed to "assemble" the parsed command */
4165 	if (!err && !simple_flow_control(cmd)) {
4166 	    err = assemble_command(cmd, dset, s, line);
4167 	}
4168     }
4169 
4170  parse_exit:
4171 
4172 #if CDEBUG
4173     if (cmd->ci == CMD_MASKED) {
4174 	fprintf(stderr, "breaking on flow control, current state = %s\n\n",
4175 		gretl_if_state_false() ? "false" : "true");
4176     } else if (compmode) {
4177 	fputc('\n', stderr);
4178     }
4179 #endif
4180 
4181     if (err) {
4182 #if CDEBUG
4183 	fprintf(stderr, "+++ tokenizer: err=%d on '%s'\n", err, line);
4184 #endif
4185 	get_or_set_errline(line, 1);
4186     } else {
4187 	get_or_set_errline(NULL, 1);
4188     }
4189 
4190     return err;
4191 }
4192 
4193 /* Here we're parsing a command line that was assembled via the gretl
4194    GUI (menus and dialogs). We can take some shortcuts in this case,
4195    since we don't have to worry about filtering comments, carrying out
4196    string substitution, or "if-state" conditionality.
4197 */
4198 
parse_gui_command(char * line,CMD * cmd,DATASET * dset)4199 int parse_gui_command (char *line, CMD *cmd, DATASET *dset)
4200 {
4201     ExecState s = {0};
4202     int err = 0;
4203 
4204     maybe_init_shadow();
4205 
4206     s.line = line;
4207     s.cmd = cmd;
4208 
4209     gretl_cmd_clear(cmd);
4210     gretl_error_clear();
4211 
4212     if (*line != '\0') {
4213 	err = tokenize_line(&s, dset, 0);
4214 	if (!err) {
4215 	    err = assemble_command(cmd, dset, NULL, NULL);
4216 	}
4217     }
4218 
4219     if (err) {
4220 	fprintf(stderr, "+++ parse_gui_command: err=%d on '%s'\n",
4221 		err, line);
4222     }
4223 
4224     return err;
4225 }
4226