1 /*
2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 /* lexer module for 'genr' and related commands */
21 
22 #include "genparse.h"
23 #include "gretl_func.h"
24 #include "uservar.h"
25 #include "gretl_string_table.h"
26 #include "gretl_normal.h"
27 #include "gretl_bundle.h"
28 #include "uservar_priv.h"
29 
30 #define NUMLEN 32
31 #define MAXQUOTE 64
32 
33 #if GENDEBUG
34 # define LDEBUG 1
35 #else
36 # define LDEBUG 0
37 #endif
38 
39 static int parser_next_char (parser *p);
40 
41 #define defining_list(p) (p->flags & P_LISTDEF)
42 
43 #define bare_data_type(s) (s > PUNCT_MAX && s < DTYPE_MAX)
44 
45 #define closing_sym(s) (s == G_RPR || s == G_RBR || s == G_RCB)
46 
47 const char *wordchars = "abcdefghijklmnopqrstuvwxyz"
48                         "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
49                         "0123456789_";
50 
51 struct str_table {
52     int id;
53     const char *str;
54 };
55 
56 struct str_table_ex {
57     int id;
58     const char *str;
59     void *ptr;
60 };
61 
62 struct str_table consts[] = {
63     { CONST_PI,       "$pi" },
64     { CONST_NA,       "NA" },
65     { CONST_INF,      "$inf" },
66     { CONST_NAN,      "$nan" },
67     { CONST_WIN32,    "WIN32" },
68     { CONST_EPS,      "$macheps" },
69     { CONST_HAVE_MPI, "$havempi" },
70     { CONST_MPI_RANK, "$mpirank" },
71     { CONST_MPI_SIZE, "$mpisize" },
72     { CONST_N_PROC,   "$nproc" },
73     { CONST_TRUE,     "TRUE" },
74     { CONST_FALSE,    "FALSE" },
75     { 0,        NULL }
76 };
77 
78 struct str_table dummies[] = {
79     { DUM_NULL,    "null" },
80     { DUM_DIAG,    "diag" },
81     { DUM_UPPER,   "upper" },
82     { DUM_LOWER,   "lower" },
83     { DUM_REAL,    "real" },
84     { DUM_IMAG,    "imag" },
85     { DUM_END,     "end" },
86     { DUM_DATASET, "dataset" },
87     { 0,        NULL }
88 };
89 
90 /* Identify matrix-selection dummy constants:
91    these can be valid only between '[' and ']'.
92 */
93 #define MSEL_DUM(d) (d >= DUM_DIAG && d <= DUM_END)
94 
95 /* dvars: dataset- and test-related accessors */
96 
97 struct str_table dvars[] = {
98     { R_NOBS,      "$nobs" },
99     { R_NVARS,     "$nvars" },
100     { R_PD,        "$pd" },
101     { R_T1,        "$t1" },
102     { R_T2,        "$t2" },
103     { R_TMAX,      "$tmax" },
104     { R_DATATYPE,  "$datatype" },
105     { R_TEST_STAT, "$test" },
106     { R_TEST_PVAL, "$pvalue" },
107     { R_TEST_BRK,  "$qlrbreak" },
108     { R_TEST_LNL,  "$rlnl" },
109     { R_STOPWATCH, "$stopwatch" },
110     { R_PUNIT,     "$unit" },
111     { R_OBSMAJ,    "$obsmajor" },
112     { R_OBSMIN,    "$obsminor" },
113     { R_OBSMIC,    "$obsmicro" },
114     { R_DATES,     "$obsdate" },
115     { R_WINDOWS,   "$windows" },
116     { R_VERSION,   "$version" },
117     { R_ERRNO,     "$error" },
118     { R_SEED,      "$seed" },
119     { R_HUGE,      "$huge" },
120     { R_NOW,       "$now" },
121     { R_RESULT,    "$result" },
122     { R_PNGFONT,   "$pngfont" },
123     { R_MAPFILE,   "$mapfile" },
124     { R_MAP,       "$map" },
125     { R_INDEX,     "obs" },
126     { R_LOGLEVEL,  "$loglevel" },
127     { R_LOGSTAMP,  "$logstamp" },
128     { 0,           NULL },
129 };
130 
131 /* mvars: model-related accessors */
132 
133 struct str_table mvars[] = {
134     { M_ESS,     "$ess" },
135     { M_T,       "$T" },
136     { M_RSQ,     "$rsq" },
137     { M_SIGMA,   "$sigma" },
138     { M_DF,      "$df" },
139     { M_NCOEFF,  "$ncoeff" },
140     { M_LNL,     "$lnl" },
141     { M_GMMCRIT, "$gmmcrit" },
142     { M_AIC,     "$aic" },
143     { M_BIC,     "$bic" },
144     { M_HQC,     "$hqc" },
145     { M_TRSQ,    "$trsq" },
146     { M_DW,      "$dw" },
147     { M_DWPVAL,  "$dwpval" },
148     { M_FSTT,    "$Fstat" },
149     { M_CHISQ,   "$chisq" },
150     { M_DIAGTEST, "$diagtest" },
151     { M_DIAGPVAL, "$diagpval" },
152     { M_PMANTEAU, "$pmanteau" },
153     { M_UHAT,    "$uhat" },
154     { M_YHAT,    "$yhat" },
155     { M_LLT,     "$llt" },
156     { M_AHAT,    "$ahat" },
157     { M_SAMPLE,  "$sample" },
158     { M_H,       "$h" },
159     { M_COEFF,   "$coeff" },
160     { M_SE,      "$stderr" },
161     { M_VCV,     "$vcv" },
162     { M_RHO,     "$rho" },
163     { M_COMPAN,  "$compan" },
164     { M_XTXINV,  "$xtxinv" },
165     { M_VECG,    "$vecGamma" },
166     { M_VMA,     "$vma" },
167     { M_FEVD,    "$fevd" },
168     { M_EVALS,   "$evals" },
169     { M_JALPHA,  "$jalpha" },
170     { M_JBETA,   "$jbeta" },
171     { M_JVBETA,  "$jvbeta" },
172     { M_JS00,    "$s00" },
173     { M_JS11,    "$s11" },
174     { M_JS01,    "$s01" },
175     { M_EC,      "$ec" },
176     { M_HAUSMAN, "$hausman" },
177     { M_SARGAN,  "$sargan" },
178     { M_SYSGAM,  "$sysGamma" },
179     { M_SYSA,    "$sysA" },
180     { M_SYSB,    "$sysB" },
181     { M_FCAST,   "$fcast" },
182     { M_FCSE,    "$fcse" },
183     { M_COEFF_CI,"$coeff_ci" },
184     { M_EHAT,    "$ehat" },
185     { M_MNLPROBS, "$mnlprobs" },
186     { M_XLIST,   "$xlist" },
187     { M_YLIST,   "$ylist" },
188     { M_COMMAND, "$command" },
189     { M_DEPVAR,  "$depvar" },
190     { M_PARNAMES, "$parnames" },
191     { 0,         NULL }
192 };
193 
194 /* bvars: bundle accessors */
195 
196 struct str_table bvars[] = {
197     { B_MODEL,   "$model" },
198     { B_SYSTEM,  "$system" },
199     { B_SYSINFO, "$sysinfo" },
200     { 0,         NULL }
201 };
202 
203 /* Below, @ptrfuncs: table of functions for which we wish to
204    attach function-pointers to the relevant NODE. Nota bene:
205    it's crucial that no function in @ptrfuncs is also listed
206    in @funcs below!
207 
208    The order of function symbols in @ptrfuncs need not match
209    the order in which they're listed in genparse.h, but it's
210    crucial that every function with ID number <= FP_MAX has
211    an entry in @ptrfuncs.
212 
213    "Crucial" -> certain crash on calling wrongly classified
214    function!
215 */
216 
217 struct str_table_ex ptrfuncs[] = {
218     { F_ABS,   "abs",   fabs },
219     { F_CEIL,  "ceil",  ceil },
220     { F_FLOOR, "floor", floor },
221     { F_SIN,   "sin",   sin },
222     { F_COS,   "cos",   cos },
223     { F_TAN,   "tan",   tan },
224     { F_ASIN,  "asin",  asin },
225     { F_ACOS,  "acos",  acos },
226     { F_ATAN,  "atan",  atan },
227     { F_SINH,  "sinh",  sinh },
228     { F_COSH,  "cosh",  cosh },
229     { F_TANH,  "tanh",  tanh },
230     { F_ASINH, "asinh", asinh },
231     { F_ACOSH, "acosh", acosh },
232     { F_ATANH, "atanh", atanh },
233     { F_LOG,   "log",   log },
234     { F_LOG10, "log10", log10 },
235     { F_LOG2,  "log2",  log2 },
236     { F_EXP,   "exp",   exp },
237     { F_SQRT,  "sqrt",  sqrt },
238     { F_GAMMA,    "gammafun", gammafun },
239     { F_LNGAMMA,  "lngamma",  lngamma },
240     { F_DIGAMMA,  "digamma",  digamma },
241     { F_TRIGAMMA, "trigamma", trigamma },
242     { F_INVMILLS, "invmills", invmills },
243     { F_ROUND,    "round",    gretl_round },
244     { F_SGN,      "sgn",      gretl_sgn },
245     { F_CNORM, "cnorm", normal_cdf },
246     { F_DNORM, "dnorm", normal_pdf },
247     { F_QNORM, "qnorm", normal_cdf_inverse },
248     { F_CARG,  "carg",  carg },
249     { F_CMOD,  "cmod",  cabs },
250     { F_REAL,  "Re",    creal },
251     { F_IMAG,  "Im",    cimag },
252     { F_LOGISTIC, "logistic", logistic_cdf },
253     { 0, NULL, NULL }
254 };
255 
256 struct str_table funcs[] = {
257     { F_ATAN2,    "atan2" },
258     { F_DIFF,     "diff" },
259     { F_LDIFF,    "ldiff" },
260     { F_SDIFF,    "sdiff" },
261     { F_LLAG,     "lags" },
262     { F_HFLAG,    "hflags" },
263     { F_DROPCOLL, "dropcoll" },
264     { F_TOINT,    "int" },
265     { F_SORT,     "sort" },
266     { F_DSORT,    "dsort" },
267     { F_SORTBY,   "sortby" },
268     { F_RANKING,  "ranking" },
269     { F_ODEV,     "orthdev" },
270     { F_NOBS,     "nobs" },
271     { F_T1,       "firstobs" },
272     { F_T2,       "lastobs" },
273     { F_RUNIFORM, "uniform" },
274     { F_RNORMAL,  "normal" },
275     { F_CUM,      "cum" },
276     { F_MISSING,  "missing" },
277     { F_DATAOK,   "ok" },        /* opposite of missing */
278     { F_MISSZERO, "misszero" },
279     { F_LRVAR,    "lrvar" },
280     { F_LRCOVAR,  "lrcovar" },
281     { F_FEVD,     "fevd" },
282     { F_QUANTILE, "quantile" },
283     { F_MEDIAN,   "median" },
284     { F_GINI,     "gini" },
285     { F_ZEROMISS, "zeromiss" },
286     { F_SUM,      "sum" },
287     { F_SUMALL,   "sumall" },
288     { F_MEAN,     "mean" },
289     { F_MIN,      "min" },
290     { F_MAX,      "max" },
291     { F_SD,       "sd" },
292     { F_VCE,      "var" },
293     { F_SKEWNESS, "skewness" },
294     { F_KURTOSIS, "kurtosis" },
295     { F_SST,      "sst" },
296     { F_RESAMPLE, "resample" },
297     { F_PNOBS,    "pnobs" },     /* per-unit nobs in panels */
298     { F_PMIN,     "pmin" },      /* panel min */
299     { F_PMAX,     "pmax" },      /* panel max */
300     { F_PSUM,     "psum" },      /* panel sum */
301     { F_PMEAN,    "pmean" },     /* panel mean */
302     { F_PXSUM,    "pxsum" },     /* panel x-sectional sum */
303     { F_PXNOBS,   "pxnobs" },    /* panel x-sectional obs count */
304     { F_PSD,      "psd" },       /* panel std dev */
305     { F_PSHRINK,  "pshrink" },
306     { F_PEXPAND,  "pexpand" },
307     { F_HPFILT,   "hpfilt" },    /* Hodrick-Prescott filter */
308     { F_BKFILT,   "bkfilt" },    /* Baxter-King filter */
309     { F_BWFILT,   "bwfilt" },    /* Butterworth filter */
310     { F_FRACDIFF, "fracdiff" },  /* fractional difference */
311     { F_BOXCOX,   "boxcox" },    /* Box-Cox transformation */
312     { F_COV,      "cov" },
313     { F_COR,      "corr" },
314     { F_MOVAVG,   "movavg" },
315     { F_IMAT,     "I" },
316     { F_ZEROS,    "zeros" },
317     { F_ONES,     "ones" },
318     { F_SEQ,      "seq" },
319     { F_REPLACE,  "replace" },
320     { F_MUNIF,    "muniform" },
321     { F_MNORM,    "mnormal" },
322     { F_SUMC,     "sumc" },
323     { F_SUMR,     "sumr" },
324     { F_PRODC,    "prodc" },
325     { F_PRODR,    "prodr" },
326     { F_MEANC,    "meanc" },
327     { F_MEANR,    "meanr" },
328     { F_SDC,      "sdc" },
329     { F_MINC,     "minc" },
330     { F_MAXC,     "maxc" },
331     { F_MINR,     "minr" },
332     { F_MAXR,     "maxr" },
333     { F_IMINC,    "iminc" },
334     { F_IMAXC,    "imaxc" },
335     { F_IMINR,    "iminr" },
336     { F_IMAXR,    "imaxr" },
337     { F_FFT,      "fft" },
338     { F_FFT2,     "fft2" },
339     { F_FFTI,     "ffti" },
340     { F_CMULT,    "cmult" },
341     { F_HDPROD,   "hdprod" },
342     { F_CDIV,     "cdiv" },
343     { F_MCOV,     "mcov" },
344     { F_MCORR,    "mcorr" },
345     { F_MXTAB,    "mxtab" },
346     { F_CDEMEAN,  "cdemean" },
347     { F_CHOL,     "cholesky" },
348     { F_PSDROOT,  "psdroot" },
349     { F_INSTRINGS, "instrings" },
350     { F_INV,      "inv" },
351     { F_INVPD,    "invpd" },
352     { F_GINV,     "ginv" },
353     { F_DIAG,     "diag" },
354     { F_TRANSP,   "transp" },
355     { F_CTRANS,   "ctrans" },
356     { F_VEC,      "vec" },
357     { F_VECH,     "vech" },
358     { F_UNVECH,   "unvech" },
359     { F_UPPER,    "upper" },
360     { F_LOWER,    "lower" },
361     { F_ROWS,     "rows" },
362     { F_COLS,     "cols" },
363     { F_DET,      "det" },
364     { F_LDET,     "ldet" },
365     { F_TRACE,    "tr" },
366     { F_NORM1,    "onenorm" },
367     { F_INFNORM,  "infnorm" },
368     { F_RCOND,    "rcond" },
369     { F_RANK,     "rank" },
370     { F_QFORM,    "qform" },
371     { F_MLAG,     "mlag" },
372     { F_QR,       "qrdecomp" },
373     { F_EIGSYM,   "eigensym" },
374     { F_EIGGEN,   "eigengen" }, /* legacy */
375     { F_EIGEN,    "eigen" },
376     { F_SCHUR,    "schur" },
377     { F_EIGSOLVE, "eigsolve" },
378     { F_NULLSPC,  "nullspace" },
379     { F_PRINCOMP, "princomp" },
380     { F_MEXP,     "mexp" },
381     { F_MLOG,     "mlog" },
382     { F_FDJAC,    "fdjac" },
383     { F_BFGSMAX,  "BFGSmax" },
384     { F_BFGSCMAX, "BFGScmax" },
385     { F_NRMAX,    "NRmax" },
386     { F_NUMHESS,  "numhess" },
387     { F_OBSNUM,   "obsnum" },
388     { F_ISDISCR,  "isdiscrete" },
389     { F_ISDUMMY,  "isdummy"},
390     { F_TYPEOF,   "typeof" },
391     { F_EXISTS,   "exists" },
392     { F_NELEM,    "nelem" },
393     { F_PDF,      "pdf" },
394     { F_CDF,      "cdf" },
395     { F_INVCDF,   "invcdf" },
396     { F_PVAL,     "pvalue" },
397     { F_CRIT,     "critical" },
398     { F_RANDGEN,  "randgen" },
399     { F_MRANDGEN, "mrandgen" },
400     { F_RANDGEN1, "randgen1" },
401     { F_URCPVAL,  "urcpval" },
402     { F_QLRPVAL,  "qlrpval" },
403     { F_VALUES,   "values" },
404     { F_UNIQ,     "uniq" },
405     { F_MSHAPE,   "mshape" },
406     { F_SVD,      "svd" },
407     { F_MOLS,     "mols" },
408     { F_MPOLS,    "mpols" },
409     { F_MRLS,     "mrls" },
410     { F_MREAD,    "mread" },
411     { F_MWRITE,   "mwrite" },
412     { F_BREAD,    "bread" },
413     { F_BWRITE,   "bwrite" },
414     { F_MCSEL,    "selifc" },
415     { F_MRSEL,    "selifr" },
416     { F_POLROOTS, "polroots" },
417     { F_DUMIFY,   "dummify" },
418     { F_WMEAN,    "wmean" },
419     { F_WVAR,     "wvar" },
420     { F_WSD,      "wsd" },
421     { F_SQUARE,   "square" },
422     { F_FILTER,   "filter" },
423     { F_KFILTER,  "kfilter" },
424     { F_KSMOOTH,  "ksmooth" },
425     { F_KDSMOOTH, "kdsmooth" },
426     { F_KSIMUL,   "ksimul" },
427     { F_KSIMDATA, "ksimdata" },
428     { F_TRIMR,    "trimr" },
429     { F_GETENV,   "getenv" },
430     { F_NGETENV,  "ngetenv" },
431     { F_ARGNAME,  "argname" },
432     { F_OBSLABEL, "obslabel" },
433     { F_READFILE, "readfile" },
434     { F_BACKTICK, "grab" },
435     { F_STRSTR,   "strstr" },
436     { F_INSTRING, "instring" },
437     { F_STRSTRIP, "strstrip" },
438     { F_STRNCMP,  "strncmp" },
439     { F_STRLEN,   "strlen" },
440     { F_PRINTF,   "printf" },
441     { F_SPRINTF,  "sprintf" },
442     { F_SSCANF,   "sscanf" },
443     { F_VARNAME,  "varname" },
444     { F_VARNAMES, "varnames" },
445     { F_VARNUM,   "varnum" },
446     { F_TOLOWER,  "tolower" },
447     { F_TOUPPER,  "toupper" },
448     { F_CNAMESET, "cnameset" },
449     { F_RNAMESET, "rnameset" },
450     { F_LJUNGBOX, "ljungbox" },
451     { F_MSORTBY,  "msortby" },
452     { F_LINCOMB,  "lincomb" },
453     { F_IMHOF,    "imhof" },
454     { F_TOEPSOLV, "toepsolv" },
455     { F_DSUM,     "diagcat" },
456     { F_XMIN,     "xmin" },
457     { F_XMAX,     "xmax" },
458     { F_CORRGM,   "corrgm" },
459     { F_MCOVG,    "mcovg" },
460     { F_FCSTATS,  "fcstats" },
461     { F_BESSEL,   "bessel" },
462     { F_FRACLAG,  "fraclag" },
463     { F_MREV,     "mreverse" },
464     { F_DESEAS,   "deseas" },
465     { F_TRAMOLIN, "linearize" },
466     { F_PERGM,    "pergm" },
467     { F_IRR,      "irr" },
468     { F_NPV,      "npv" },
469     { F_WEEKDAY,  "weekday" },
470     { F_KDENSITY, "kdensity" },
471     { F_MONTHLEN, "monthlen" },
472     { F_EPOCHDAY, "epochday" },
473     { F_SETNOTE,  "setnote" },
474     { F_POLYFIT,  "polyfit" },
475     { F_CHOWLIN,  "chowlin" },
476     { F_VARSIMUL, "varsimul" },
477     { F_STRSPLIT, "strsplit" },
478     { F_INLIST,   "inlist" },
479     { F_ERRMSG,   "errmsg" },
480     { F_ISCONST,  "isconst" },
481     { F_IRF,      "irf" },
482     { F_INBUNDLE, "inbundle" },
483     { F_STRSUB,   "strsub" },
484     { F_REGSUB,   "regsub" },
485     { F_CNAMEGET, "cnameget" },
486     { F_RNAMEGET, "rnameget" },
487     { F_RANDINT,  "randint" },
488     { F_NADARWAT, "nadarwat" },   /* Nadaraya-Watson */
489     { F_SIMANN,   "simann" },     /* simulated annealing */
490     { F_LOESS,    "loess" },
491     { F_FREQ,     "freq" },
492     { F_GHK,      "ghk" },
493     { F_HALTON,   "halton" },
494     { F_IWISHART, "iwishart" },
495     { F_ISNAN,    "isnan" },
496     { F_TYPESTR,  "typestr" },
497     { F_QUADTAB,  "quadtable" },
498     { F_AGGRBY,   "aggregate" },
499     { F_REMOVE,   "remove" },
500     { F_ISODATE,  "isodate" },
501     { F_ISOWEEK,  "isoweek" },
502     { F_JULDATE,  "juldate" },
503     { F_GETLINE,  "getline" },
504     { F_ATOF,     "atof" },
505     { F_FIXNAME,  "fixname" },
506     { F_ISOCONV,  "isoconv" },
507     { F_SUBSTR,   "substr" },
508     { F_MPI_SEND, "mpisend" },
509     { F_MPI_RECV, "mpirecv" },
510     { F_BCAST,    "mpibcast" },
511     { F_REDUCE,   "mpireduce" },
512     { F_ALLREDUCE, "mpiallred" },
513     { F_SCATTER,   "mpiscatter" },
514     { F_BARRIER,   "mpibarrier" },
515     { F_EASTER,    "easterday" },
516     { F_GENSERIES, "genseries" },
517     { F_CURL,      "curl" },
518     { F_JSONGET,   "jsonget" },
519     { F_JSONGETB,  "jsongetb" },
520     { F_XMLGET,    "xmlget" },
521     { F_NLINES,    "nlines" },
522     { F_KPSSCRIT,  "kpsscrit" },
523     { F_ARRAY,     "array" },
524     { F_STRVALS,   "strvals" },
525     { F_STRINGIFY, "stringify" },
526     { F_BOOTCI,    "bootci" },
527     { F_BOOTPVAL,  "bootpval" },
528     { F_SEASONALS, "seasonals" },
529     { F_DEFARRAY,  "defarray" },
530     { F_DEFBUNDLE, "defbundle" },
531     { F_DEFLIST,   "deflist" },
532     { F_DEFARGS,   "_" },
533     { F_KSETUP,    "ksetup" },
534     { F_MWEIGHTS,  "mweights" },
535     { F_MGRADIENT, "mgradient" },
536     { F_MLINCOMB,  "mlincomb" },
537     { F_MIDASMULT, "midasmult" },
538     { F_HFDIFF,    "hfdiff" },
539     { F_HFLDIFF,   "hfldiff" },
540     { F_HFLIST,    "hflist" },
541     { F_NMMAX,     "NMmax" },
542     { F_GSSMAX,    "GSSmax" },
543     { F_CNUMBER,   "cnumber" },
544     { F_NAALEN,    "naalen" },
545     { F_KMEIER,    "kmeier" },
546     { F_NORMTEST,  "normtest" },
547     { F_ECDF,      "ecdf" },
548     { F_NPCORR,    "npcorr" },
549     { F_DAYSPAN,   "dayspan" },
550     { F_SMPLSPAN,  "smplspan" },
551     { F_SLEEP,     "sleep" },
552     { F_GETINFO,   "getinfo" },
553     { F_CDUMIFY,   "cdummify" },
554     { F_SVM,       "svm" },
555     { F_GETKEYS,   "getkeys" },
556     { F_FEVAL,     "feval" },
557     { F_BRENAME,   "brename" },
558     { F_CCODE,     "isocountry" },
559     { F_LSOLVE,    "Lsolve" },
560     { F_HYP2F1,    "hyp2f1" },
561     { F_STRFTIME,  "strftime" },
562     { F_STRPTIME,  "strptime" },
563     { F_BKW,       "bkw" },
564     { F_FZERO,     "fzero" },
565     { F_CONV2D,    "conv2d" },
566     { F_MSPLITBY,  "msplitby" },
567     { F_FLATTEN,   "flatten" },
568     { F_FUNCERR,   "funcerr" },
569     { F_ERRORIF,   "errorif" },
570     { F_ISCMPLX,   "iscomplex" },
571     { F_COMPLEX,   "complex" },
572     { F_CONJ,      "conj" },
573     { F_CSWITCH,   "cswitch" },
574     { F_RANDPERM,  "randperm" },
575     { F_STDIZE,    "stdize" },
576     { F_STACK,     "stack" },
577     { F_GEOPLOT,   "geoplot" },
578     { F_BINCOEFF,  "bincoeff" },
579     { F_TDISAGG,   "tdisagg" },
580     { F_ASSERT,    "assert" },
581     { F_VMA,       "vma" },
582     { F_BCHECK,    "bcheck" },
583     { F_CONTAINS,  "contains" },
584     { F_LPSOLVE,   "lpsolve" },
585     { 0,           NULL }
586 };
587 
588 struct str_table func_alias[] = {
589     { F_EIGEN,    "eiggen2" },
590     { F_NMMAX,    "NMmin" },
591     { F_NRMAX,    "NRmin" },
592     { F_BFGSMAX,  "BFGSmin" },
593     { F_BFGSCMAX, "BFGScmin" },
594     { F_GSSMAX,   "GSSmin" },
595     { F_GAMMA,    "gammafunc" },
596     { F_GAMMA,    "gamma" },
597     { F_LOG,      "logs" },
598     { F_LOG,      "ln" },
599     { F_SQUARE,   "xpx" },
600     { F_BACKTICK, "$" },
601     { F_CNAMESET, "colnames" },
602     { F_RNAMESET, "rownames" },
603     { F_CNAMEGET, "colname" },
604     { F_RNAMEGET, "rowname" },
605     { F_EXISTS,   "isnull" }, /* deprecated */
606     { 0,          NULL }
607 };
608 
609 struct str_table hidden_funcs[] = {
610     { HF_CLOGFI,   "_clogitfi" },
611     { HF_JBTERMS,  "_jbterms" },
612     { HF_LISTINFO, "_listinfo" },
613     { HF_REGLS,    "_regls" },
614     { HF_SFCGI,    "_sfcgi" },
615     { 0,           NULL }
616 };
617 
const_lookup(const char * s)618 int const_lookup (const char *s)
619 {
620     int i;
621 
622     for (i=0; consts[i].id != 0; i++) {
623 	if (!strcmp(s, consts[i].str)) {
624 	    return consts[i].id;
625 	}
626     }
627 
628     return 0;
629 }
630 
constname(int c)631 const char *constname (int c)
632 {
633     int i;
634 
635     for (i=0; consts[i].id != 0; i++) {
636 	if (c == consts[i].id) {
637 	    return consts[i].str;
638 	}
639     }
640 
641     return "unknown";
642 }
643 
gretl_function_hash_init(void)644 static GHashTable *gretl_function_hash_init (void)
645 {
646     GHashTable *ht;
647     int i;
648 
649     ht = g_hash_table_new(g_str_hash, g_str_equal);
650 
651     for (i=0; ptrfuncs[i].str != NULL; i++) {
652 	g_hash_table_insert(ht, (gpointer) ptrfuncs[i].str, &ptrfuncs[i]);
653     }
654 
655     for (i=0; funcs[i].str != NULL; i++) {
656 	g_hash_table_insert(ht, (gpointer) funcs[i].str, &funcs[i]);
657     }
658 
659     for (i=0; hidden_funcs[i].str != NULL; i++) {
660 	g_hash_table_insert(ht, (gpointer) hidden_funcs[i].str,
661 			    &hidden_funcs[i]);
662     }
663 
664     return ht;
665 }
666 
667 static GHashTable *oht;
668 
install_function_override(const char * funname,const char * pkgname,gpointer data)669 int install_function_override (const char *funname,
670 			       const char *pkgname,
671 			       gpointer data)
672 {
673     if (funname == NULL) {
674 	/* cleanup signal */
675 	if (oht != NULL) {
676 	    g_hash_table_destroy(oht);
677 	    oht = NULL;
678 	}
679 	return 0;
680     }
681 
682     if (oht == NULL) {
683 	oht = g_hash_table_new_full(g_str_hash, g_str_equal,
684 				    g_free, NULL);
685     }
686 
687     if (oht != NULL) {
688 	gchar *key = g_strdup_printf("%s::%s", pkgname, funname);
689 
690 	g_hash_table_insert(oht, (gpointer) key, data);
691     }
692 
693     return 0;
694 }
695 
delete_function_override(const char * funname,const char * pkgname)696 int delete_function_override (const char *funname,
697 			      const char *pkgname)
698 {
699     int ret = 0;
700 
701     if (oht != NULL) {
702 	gchar *key = g_strdup_printf("%s::%s", pkgname, funname);
703 
704 	if (g_hash_table_remove(oht, key)) {
705 	    fprintf(stderr, "'%s': deleted override of built-in\n", key);
706 	    ret = 1;
707 	}
708 	g_free(key);
709     }
710 
711     return ret;
712 }
713 
get_function_override(const char * sf,gpointer p)714 static ufunc *get_function_override (const char *sf,
715 				     gpointer p)
716 {
717     const char *sp = function_package_get_name(p);
718     gchar *key = g_strdup_printf("%s::%s", sp, sf);
719     ufunc *uf = g_hash_table_lookup(oht, key);
720 
721 #if 0
722     if (uf != NULL) {
723 	fprintf(stderr, "'%s': using package override\n", key);
724     }
725 #endif
726     g_free(key);
727 
728     return uf;
729 }
730 
731 /* Attention: this function is called from function_loopkup()
732    below, and in that context @p will be NULL -- so don't
733    dereference @p without checking it for nullity first!
734 */
735 
real_function_lookup(const char * s,int aliases,parser * p)736 static int real_function_lookup (const char *s, int aliases,
737 				 parser *p)
738 {
739     static GHashTable *fht;
740     gpointer fnp;
741 
742     if (s == NULL) {
743 	/* cleanup signal */
744 	if (fht != NULL) {
745 	    g_hash_table_destroy(fht);
746 	    fht = NULL;
747 	}
748 	return 0;
749     }
750 
751     if (fht == NULL) {
752 	fht = gretl_function_hash_init();
753     }
754 
755     fnp = g_hash_table_lookup(fht, s);
756     if (fnp != NULL) {
757 	struct str_table *st = (struct str_table *) fnp;
758 
759 	if (p != NULL && st->id > 0 && st->id < FP_MAX) {
760 	    struct str_table_ex *sx = (struct str_table_ex *) fnp;
761 
762 	    p->data = sx->ptr;
763 	}
764 	return st->id;
765     }
766 
767     if (aliases) {
768 	int i;
769 
770 	for (i=0; func_alias[i].id != 0; i++) {
771 	    if (!strcmp(s, func_alias[i].str)) {
772 #if 0 /* not just yet? */
773 		if (!strcmp(s, "isnull")) {
774 		    gretl_warnmsg_set(_("obsolete function isnull(): "
775 					"please use !exists() instead"));
776 		}
777 #endif
778 		if (p != NULL) {
779 		    p->flags |= P_ALIASED;
780 		}
781 		return func_alias[i].id;
782 	    }
783 	}
784     }
785 
786     return 0;
787 }
788 
gretl_function_hash_cleanup(void)789 void gretl_function_hash_cleanup (void)
790 {
791     real_function_lookup(NULL, 0, NULL);
792     install_function_override(NULL, NULL, NULL);
793 }
794 
function_lookup(const char * s)795 int function_lookup (const char *s)
796 {
797     return real_function_lookup(s, 0, NULL);
798 }
799 
is_function_alias(const char * s)800 int is_function_alias (const char *s)
801 {
802     int i;
803 
804     for (i=0; func_alias[i].id != 0; i++) {
805 	if (!strcmp(s, func_alias[i].str)) {
806 	    return 1;
807 	}
808     }
809 
810     return 0;
811 }
812 
get_genr_function_pointer(int f)813 void *get_genr_function_pointer (int f)
814 {
815     int i;
816 
817     for (i=0; ptrfuncs[i].str != NULL; i++) {
818 	if (ptrfuncs[i].id == f) {
819 	    return ptrfuncs[i].ptr;
820 	}
821     }
822 
823     return NULL;
824 }
825 
function_lookup_with_alias(const char * s,parser * p)826 static int function_lookup_with_alias (const char *s,
827 				       parser *p)
828 {
829     if (oht != NULL) {
830 	/* we have a record of one or more package-private
831 	   functions whose names collide with built-ins
832 	*/
833 	gpointer pp = get_active_function_package(OPT_O);
834 
835 	if (pp != NULL) {
836 	    ufunc *uf = get_function_override(s, pp);
837 
838 	    if (uf != NULL) {
839 		p->idstr = gretl_strdup(s);
840 		p->data = uf;
841 		return UFUN;
842 	    }
843 	}
844     }
845 
846     return real_function_lookup(s, 1, p);
847 }
848 
funname(int t)849 static const char *funname (int t)
850 {
851     int i;
852 
853     for (i=0; ptrfuncs[i].id != 0; i++) {
854 	if (t == ptrfuncs[i].id) {
855 	    return ptrfuncs[i].str;
856 	}
857     }
858 
859     for (i=0; funcs[i].id != 0; i++) {
860 	if (t == funcs[i].id) {
861 	    return funcs[i].str;
862 	}
863     }
864 
865     for (i=0; hidden_funcs[i].id != 0; i++) {
866 	if (t == hidden_funcs[i].id) {
867 	    return hidden_funcs[i].str;
868 	}
869     }
870 
871     return "unknown";
872 }
873 
show_alias(int i)874 static int show_alias (int i)
875 {
876     if (strstr(func_alias[i].str, "min")) {
877 	return 1;
878     } else {
879 	return 0;
880     }
881 }
882 
883 /* return the number of built-in functions */
884 
gen_func_count(void)885 int gen_func_count (void)
886 {
887     int i, n = 0;
888 
889     for (i=0; ptrfuncs[i].id != 0; i++) {
890 	n++;
891     }
892 
893     for (i=0; funcs[i].id != 0; i++) {
894 	n++;
895     }
896 
897     for (i=0; func_alias[i].id != 0; i++) {
898 	if (show_alias(i)) {
899 	    n++;
900 	}
901     }
902 
903     return n;
904 }
905 
906 /* return the name of function @i, including aliases */
907 
gen_func_name(int i)908 const char *gen_func_name (int i)
909 {
910     int j, seq = -1;
911 
912     for (j=0; ptrfuncs[j].id != 0; j++) {
913 	seq++;
914 	if (seq == i) {
915 	    return ptrfuncs[i].str;
916 	}
917     }
918 
919     for (j=0; funcs[j].id != 0; j++) {
920 	seq++;
921 	if (seq == i) {
922 	    return funcs[j].str;
923 	}
924     }
925 
926     for (j=0; func_alias[j].id != 0; j++) {
927 	if (show_alias(j)) {
928 	    seq++;
929 	}
930 	if (seq == i) {
931 	    return func_alias[j].str;
932 	}
933     }
934 
935     return NULL;
936 }
937 
model_var_count(void)938 int model_var_count (void)
939 {
940     int i;
941 
942     for (i=0; mvars[i].id != 0; i++) ;
943     return i;
944 }
945 
model_var_name(int i)946 const char *model_var_name (int i)
947 {
948     return mvars[i].str;
949 }
950 
bundle_var_count(void)951 int bundle_var_count (void)
952 {
953     int i;
954 
955     for (i=0; bvars[i].id != 0; i++) ;
956     return i;
957 }
958 
bundle_var_name(int i)959 const char *bundle_var_name (int i)
960 {
961     return bvars[i].str;
962 }
963 
data_var_count(void)964 int data_var_count (void)
965 {
966     int i, n = 0;
967 
968     for (i=0; dvars[i].id != 0; i++) {
969 	if (dvars[i].str[0] == '$') {
970 	    n++;
971 	}
972     }
973 
974     return n;
975 }
976 
data_var_name(int i)977 const char *data_var_name (int i)
978 {
979     return dvars[i].str;
980 }
981 
gretl_function_complete(const char * s)982 const char *gretl_function_complete (const char *s)
983 {
984     size_t n = strlen(s);
985     int i;
986 
987     for (i=0; ptrfuncs[i].str != NULL; i++) {
988 	if (!strncmp(s, ptrfuncs[i].str, n)) {
989 	    return ptrfuncs[i].str;
990 	}
991     }
992 
993     for (i=0; funcs[i].str != NULL; i++) {
994 	if (!strncmp(s, funcs[i].str, n)) {
995 	    return funcs[i].str;
996 	}
997     }
998 
999     return NULL;
1000 }
1001 
gretl_const_count(void)1002 int gretl_const_count (void)
1003 {
1004     int i;
1005 
1006     for (i=0; consts[i].id != 0; i++) ;
1007     return i;
1008 }
1009 
gretl_const_name(int i)1010 const char *gretl_const_name (int i)
1011 {
1012     return consts[i].str;
1013 }
1014 
1015 /* end external stuff */
1016 
1017 /* cases where 'end' can indicate 'last element of' */
1018 #define DUM_END_OK(t) (t==MAT || t==ARRAY || t==MVAR || t==STR || t == LIST)
1019 
dummy_lookup(const char * s,parser * p)1020 static int dummy_lookup (const char *s, parser *p)
1021 {
1022     int i, d = 0;
1023 
1024     for (i=0; dummies[i].id != 0; i++) {
1025 	if (!strcmp(s, dummies[i].str)) {
1026 	    d = dummies[i].id;
1027 	    break;
1028 	}
1029     }
1030 
1031     if (MSEL_DUM(d) && parser_next_char(p) != ']') {
1032 	if (d == DUM_END && DUM_END_OK(p->upsym)) {
1033 	    ; /* OK? */
1034 	} else if (d == DUM_END) {
1035 	    fprintf(stderr, "DUM_END: not interpreted for upsym %s\n",
1036 		    getsymb(p->upsym));
1037 	    d = 0;
1038 	} else {
1039 	    d = 0;
1040 	}
1041     }
1042 
1043     return d;
1044 }
1045 
dumname(int t)1046 const char *dumname (int t)
1047 {
1048     int i;
1049 
1050     for (i=0; dummies[i].id != 0; i++) {
1051 	if (t == dummies[i].id) {
1052 	    return dummies[i].str;
1053 	}
1054     }
1055 
1056     return "unknown";
1057 }
1058 
dvar_lookup(const char * s)1059 static int dvar_lookup (const char *s)
1060 {
1061     int i;
1062 
1063     for (i=0; dvars[i].id != 0; i++) {
1064 	if (!strcmp(s, dvars[i].str)) {
1065 	    return dvars[i].id;
1066 	}
1067     }
1068 
1069     return 0;
1070 }
1071 
dvarname(int t)1072 const char *dvarname (int t)
1073 {
1074     int i;
1075 
1076     for (i=0; dvars[i].id != 0; i++) {
1077 	if (t == dvars[i].id) {
1078 	    return dvars[i].str;
1079 	}
1080     }
1081 
1082     return "unknown";
1083 }
1084 
mvar_lookup(const char * s)1085 int mvar_lookup (const char *s)
1086 {
1087     int i;
1088 
1089     for (i=0; mvars[i].id != 0; i++) {
1090 	if (!strcmp(s, mvars[i].str)) {
1091 	    return mvars[i].id;
1092 	}
1093     }
1094 
1095     /* aliases */
1096 
1097     if (!strcmp(s, "$nrsq")) {
1098 	return M_TRSQ;
1099     } else if (!strcmp(s, "$fcerr")) {
1100 	return M_FCSE;
1101     }
1102 
1103     return 0;
1104 }
1105 
mvarname(int t)1106 const char *mvarname (int t)
1107 {
1108     int i;
1109 
1110     for (i=0; mvars[i].id != 0; i++) {
1111 	if (t == mvars[i].id) {
1112 	    return mvars[i].str;
1113 	}
1114     }
1115 
1116     return "unknown";
1117 }
1118 
bvar_lookup(const char * s)1119 int bvar_lookup (const char *s)
1120 {
1121     int i;
1122 
1123     for (i=0; bvars[i].id != 0; i++) {
1124 	if (!strcmp(s, bvars[i].str)) {
1125 	    return bvars[i].id;
1126 	}
1127     }
1128 
1129     return 0;
1130 }
1131 
bvarname(int t)1132 const char *bvarname (int t)
1133 {
1134     int i;
1135 
1136     for (i=0; bvars[i].id != 0; i++) {
1137 	if (t == bvars[i].id) {
1138 	    return bvars[i].str;
1139 	}
1140     }
1141 
1142     return "unknown";
1143 }
1144 
genr_function_word(const char * s)1145 int genr_function_word (const char *s)
1146 {
1147     int ret = 0;
1148 
1149     ret = real_function_lookup(s, 0, NULL);
1150     if (!ret) {
1151 	ret = dvar_lookup(s);
1152     }
1153     if (!ret) {
1154 	ret = mvar_lookup(s);
1155     }
1156     if (!ret) {
1157 	ret = bvar_lookup(s);
1158     }
1159     if (!ret) {
1160 	ret = const_lookup(s);
1161     }
1162 
1163     return ret;
1164 }
1165 
parser_ensure_error_buffer(parser * p)1166 int parser_ensure_error_buffer (parser *p)
1167 {
1168     if (p->errprn == NULL) {
1169 	p->errprn = gretl_print_new(GRETL_PRINT_BUFFER, NULL);
1170 	if (p->errprn != NULL) {
1171 	    p->prn = p->errprn;
1172 	    return 0;
1173 	} else {
1174 	    return E_ALLOC;
1175 	}
1176     }
1177 
1178     return 0;
1179 }
1180 
undefined_symbol_error(const char * s,parser * p)1181 void undefined_symbol_error (const char *s, parser *p)
1182 {
1183     parser_ensure_error_buffer(p);
1184     parser_print_input(p);
1185 
1186     if (p->ch == '.') {
1187 	pprintf(p->prn, _("%s: no such object"), s);
1188     } else {
1189 	pprintf(p->prn, _("The symbol '%s' is undefined"), s);
1190     }
1191     p->err = E_DATA;
1192 }
1193 
function_noargs_error(const char * s,parser * p)1194 static void function_noargs_error (const char *s, parser *p)
1195 {
1196     parser_ensure_error_buffer(p);
1197     parser_print_input(p);
1198 
1199     pprintf(p->prn, _("'%s': no argument was given"), s);
1200     p->err = E_ARGS;
1201 }
1202 
context_error(int c,parser * p,const char * func)1203 void context_error (int c, parser *p, const char *func)
1204 {
1205 #if LDEBUG
1206     if (func != NULL) {
1207 	fprintf(stderr, "context error in %s()\n", func);
1208     }
1209 #endif
1210     parser_ensure_error_buffer(p);
1211     if (c != 0) {
1212 	parser_print_input(p);
1213 	pprintf(p->prn, _("The symbol '%c' is not valid in this context\n"), c);
1214 	if (c == '&') {
1215 	    pputs(p->prn, _("(for logical AND, use '&&')\n"));
1216 	} else if (c == '|') {
1217 	    pputs(p->prn, _("(for logical OR, use '||')\n"));
1218 	} else if (c == ',') {
1219 	    p->err = E_PARSE;
1220 	}
1221     } else if (p->sym == EOT) {
1222 	parser_print_input(p);
1223 	pputs(p->prn, _("Incomplete expression\n"));
1224     } else {
1225 	const char *s = getsymb_full(p->sym, p);
1226 
1227 	if (s != NULL && *s != '\0' && strcmp(s, "unknown")) {
1228 	    pprintf(p->prn, _("The symbol '%s' is not valid in this context\n"),
1229 		    getsymb_full(p->sym, p));
1230 	} else {
1231 	    pprintf(p->prn, "The symbol %d is not valid in this context\n",
1232 		    p->sym);
1233 	}
1234     }
1235 
1236     if (!p->err) {
1237 	p->err = E_PARSE;
1238     }
1239 }
1240 
1241 /* @parsing_query: we want to keep track of the case
1242    where we're lexing/parsing the branches of a
1243    ternary "query" expression. When such an expression
1244    is evaluated, it's OK if the branch _not_ taken
1245    contains an undefined symbol; indeed, this can
1246    occur by design, as in
1247 
1248      scalar y = isnull(x) ? 0 : x
1249 
1250    when "x" is in fact undefined.
1251 
1252    We therefore use the "UNDEF" node type to defuse the
1253    error that would otherwise arise on parsing. An error
1254    is triggered only if the branch that references the
1255    UNDEF node is selected (attempting to evaluate an UNDEF
1256    node automatically throws an error.)
1257 */
1258 
1259 static int parsing_query;
1260 
set_parsing_query(int s)1261 void set_parsing_query (int s)
1262 {
1263     parsing_query = s;
1264 }
1265 
get_quoted_string(parser * p,int prevsym)1266 static char *get_quoted_string (parser *p, int prevsym)
1267 {
1268     char *s = NULL;
1269     int n;
1270 
1271 #if 0
1272     fprintf(stderr, "get_quoted_string: sym = '%s', prevsym '%s'\n",
1273 	    getsymb(p->sym), getsymb(prevsym));
1274 #endif
1275 
1276     /* Should backslash be taken as literal or as
1277        escape character? Depends on the context,
1278        but in exactly what way?
1279     */
1280 
1281     if (prevsym != F_SPRINTF && parsing_query) {
1282 	/* this branch revised 2020-02-06 */
1283 	n = gretl_charpos('"', p->point);
1284     } else {
1285 	/* look for a matching (non-escaped) double-quote */
1286 	n = double_quote_position(p->point);
1287 	if (n < 0) {
1288 	    /* backward compatibility */
1289 	    n = gretl_charpos('"', p->point);
1290 	}
1291     }
1292 
1293     if (n >= 0) {
1294 	s = gretl_strndup(p->point, n);
1295 	parser_advance(p, n + 1);
1296     } else {
1297 	parser_print_input(p);
1298 	pprintf(p->prn, _("Unmatched '%c'\n"), '"');
1299 	p->err = E_PARSE;
1300     }
1301 
1302     if (!p->err) {
1303 	if (p->ch == '.' && *p->point == '$') {
1304 	    /* maybe quoted name of saved model followed by
1305 	       dollar variable? */
1306 	    p->sym = MMEMB;
1307 	} else {
1308 	    p->sym = CSTR;
1309 	}
1310     }
1311 
1312     if (s != NULL && !strcmp(s, "\\\"")) {
1313 	gchar *gs;
1314 
1315 	free(s);
1316 	gs = g_strdup_printf("\"");
1317 	s = gretl_strdup(s);
1318 	free(gs);
1319     }
1320 
1321     return s;
1322 }
1323 
might_be_date_string(const char * s,int n)1324 static int might_be_date_string (const char *s, int n)
1325 {
1326     char test[12];
1327     int y, m, d;
1328 
1329 #if LDEBUG
1330     fprintf(stderr, "might_be_date_string: s='%s', n=%d\n", s, n);
1331 #endif
1332 
1333     if (n > 10) {
1334 	return 0;
1335     }
1336 
1337     *test = 0;
1338     strncat(test, s, n);
1339 
1340     if (strspn(s, "1234567890") == n) {
1341 	/* plain integer (FIXME?) */
1342 	return 1;
1343     } else if (sscanf(s, "%d:%d", &y, &m) == 2) {
1344 	/* quarterly, monthly date */
1345 	return 1;
1346     } else if (sscanf(s, "%d-%d-%d", &y, &m, &d) == 3) {
1347 	/* daily date? */
1348 	return 1;
1349     } else if (sscanf(s, "%d/%d/%d", &y, &m, &d) == 3) {
1350 	/* daily date? */
1351 	return 1;
1352     }
1353 
1354     return 0;
1355 }
1356 
obs_node(parser * p)1357 NODE *obs_node (parser *p)
1358 {
1359     NODE *ret = NULL;
1360     char word[OBSLEN + 2] = {0};
1361     const char *s = p->point - 1;
1362     int close;
1363     int special = 0;
1364     int t = -1;
1365 
1366     close = gretl_charpos(']', s);
1367 
1368 #if LDEBUG
1369     fprintf(stderr, "obs_node: s='%s', ch='%c', close=%d\n",
1370 	    s, (char) p->ch, close);
1371 #endif
1372 
1373     if (close == 0) {
1374 	pprintf(p->prn, _("Empty observation []\n"));
1375 	p->err = E_PARSE;
1376     } else if (close < 0) {
1377 	pprintf(p->prn, _("Unmatched '%c'\n"), '[');
1378 	p->err = E_PARSE;
1379     } else if (*s == '"' && close < OBSLEN + 2 &&
1380 	       gretl_charpos('"', s+1) == close - 2) {
1381 	/* quoted observation label? */
1382 	strncat(word, s, close);
1383 	special = 1;
1384     } else if (might_be_date_string(s, close)) {
1385 	strncat(word, s, close);
1386 	special = 1;
1387     }
1388 
1389     if (special && !p->err) {
1390 	t = get_t_from_obs_string(word, p->dset);
1391 	if (t >= 0) {
1392 	    /* convert to user-style 1-based index */
1393 	    t++;
1394 	}
1395     }
1396 
1397     if (t > 0) {
1398 	parser_advance(p, close - 1);
1399 	lex(p);
1400 	ret = newdbl(t);
1401     } else if (!p->err) {
1402 #if LDEBUG
1403 	fprintf(stderr, "obs_node: first try failed, going for expr\n");
1404 #endif
1405 	lex(p);
1406 	ret = expr(p);
1407     }
1408 
1409     return ret;
1410 }
1411 
is_gretl_accessor(const char * s)1412 int is_gretl_accessor (const char *s)
1413 {
1414     int i, n;
1415 
1416     for (i=0; dvars[i].id != 0; i++) {
1417 	n = strlen(dvars[i].str);
1418 	if (!strncmp(s, dvars[i].str, n)) {
1419 	    return !isalpha(s[n]);
1420 	}
1421     }
1422 
1423     for (i=0; mvars[i].id != 0; i++) {
1424 	n = strlen(mvars[i].str);
1425 	if (!strncmp(s, mvars[i].str, n)) {
1426 	    return !isalpha(s[n]);
1427 	}
1428     }
1429 
1430     for (i=0; bvars[i].id != 0; i++) {
1431 	n = strlen(bvars[i].str);
1432 	if (!strncmp(s, bvars[i].str, n)) {
1433 	    return !isalpha(s[n]);
1434 	}
1435     }
1436 
1437     return 0;
1438 }
1439 
look_up_dollar_word(const char * s,parser * p)1440 static void look_up_dollar_word (const char *s, parser *p)
1441 {
1442     char *bstr;
1443 
1444     if ((p->idnum = dvar_lookup(s)) > 0) {
1445 	p->sym = DVAR;
1446     } else if ((p->idnum = const_lookup(s)) > 0) {
1447 	p->sym = CON;
1448     } else if ((p->idnum = mvar_lookup(s)) > 0) {
1449 	p->sym = MVAR;
1450     } else if ((p->idnum = bvar_lookup(s)) > 0) {
1451 	p->sym = DBUNDLE;
1452     } else if ((bstr = get_built_in_string_by_name(s+1))) {
1453 	p->sym = CSTR;
1454 	p->idstr = gretl_strdup(bstr);
1455     } else {
1456 	undefined_symbol_error(s, p);
1457     }
1458 
1459 #if LDEBUG
1460     fprintf(stderr, "look_up_dollar_word: '%s' -> %d\n",
1461 	    s, p->idnum);
1462 #endif
1463 }
1464 
1465 #ifdef USE_RLIB
1466 # include "gretl_foreign.h"
1467 # include "libset.h"
1468 
maybe_get_R_function(const char * s)1469 static int maybe_get_R_function (const char *s)
1470 {
1471     if (libset_get_bool(R_FUNCTIONS) &&
1472 	strlen(s) >= 3 && !strncmp(s, "R.", 2)) {
1473 	return get_R_function_by_name(s + 2);
1474     } else {
1475 	return 0;
1476     }
1477 }
1478 
1479 #else /* !USE_RLIB */
1480 # define maybe_get_R_function(s) (0)
1481 #endif
1482 
1483 static int doing_genseries;
1484 
set_doing_genseries(int s)1485 void set_doing_genseries (int s)
1486 {
1487     doing_genseries = s;
1488 }
1489 
1490 /* Get the next non-space byte beyond what's already parsed:
1491    this will either be p->ch, or may be found at p->point
1492    or beyond.
1493 */
1494 
parser_next_char(parser * p)1495 static int parser_next_char (parser *p)
1496 {
1497     if (p->ch != ' ') {
1498 	return p->ch;
1499     } else {
1500 	const char *s = p->point;
1501 
1502 	while (*s) {
1503 	    if (!isspace(*s)) {
1504 		return *s;
1505 	    }
1506 	    s++;
1507 	}
1508 	return 0;
1509     }
1510 }
1511 
char_past_point(parser * p)1512 static int char_past_point (parser *p)
1513 {
1514     if (*p->point != '\0') {
1515 	int i;
1516 
1517 	for (i=1; p->point[i] != '\0'; i++) {
1518 	    if (!isspace(p->point[i])) {
1519 		return p->point[i];
1520 	    }
1521 	}
1522     }
1523     return 0;
1524 }
1525 
handle_lpnext(const char * s,parser * p,int have_dset)1526 static void handle_lpnext (const char *s, parser *p,
1527 			   int have_dset)
1528 {
1529     ufunc *u = get_user_function_by_name(s);
1530     int vnum = -1;
1531 
1532     if (have_dset) {
1533 	vnum = current_series_index(p->dset, s);
1534     }
1535 
1536     if (u == NULL && vnum >= 0) {
1537 	/* unambiguous: series */
1538 	p->sym = SERIES;
1539     } else if (u != NULL && vnum < 0) {
1540 	/* unambiguous: function */
1541 	p->sym = UFUN;
1542     } else if (u != NULL) {
1543 	/* ambiguous case! */
1544 	if (gretl_function_depth() > 0) {
1545 	    /* function writers should avoid collisions
1546 	       when naming series
1547 	    */
1548 	    p->sym = SERIES;
1549 	} else if (p->targ != UNK && p->targ != LIST && p->targ != SERIES) {
1550 	    /* target not compatible with series lag? */
1551 	    p->sym = UFUN;
1552 	} else if (defining_list(p) || p->targ == SERIES) {
1553 	    /* debatable */
1554 	    p->sym = SERIES;
1555 	} else {
1556 	    /* debatable */
1557 	    p->sym = UFUN;
1558 	}
1559     }
1560 
1561     if (p->sym != 0) {
1562 	p->idstr = gretl_strdup(s);
1563 	if (p->sym == UFUN) {
1564 	    p->data = u;
1565 	} else {
1566 	    p->idnum = vnum;
1567 	    /* in case of any intervening space */
1568 	    while (p->ch == ' ') {
1569 		parser_getc(p);
1570 	    }
1571 	}
1572     }
1573 }
1574 
is_function_word(const char * s)1575 static int is_function_word (const char *s)
1576 {
1577     return function_lookup_with_alias(s, NULL) != 0 ||
1578 	get_user_function_by_name(s) != NULL;
1579 }
1580 
look_up_word(const char * s,parser * p)1581 static void look_up_word (const char *s, parser *p)
1582 {
1583     int have_dset = (p->dset != NULL && p->dset->v > 0);
1584     int prevsym = p->sym;
1585     int lpnext, err = 0;
1586 
1587 #if LDEBUG
1588     fprintf(stderr, "look_up_word: s='%s', ch='%c', next='%c'\n",
1589 	    s, p->ch, parser_next_char(p));
1590 #endif
1591 
1592     /* is the next (or next non-space) character left paren? */
1593     lpnext = parser_next_char(p) == '(';
1594 
1595     /* initialize */
1596     p->sym = 0;
1597     p->data = NULL;
1598 
1599     if (lpnext) {
1600 	/* identifier is immediately followed by left paren:
1601 	   most likely a function call but could be the name
1602 	   of a series followed by a lag specifier
1603 	*/
1604 	p->sym = function_lookup_with_alias(s, p);
1605 	if (p->sym == 0) {
1606 	    handle_lpnext(s, p, have_dset);
1607 	} else if (p->sym == F_STACK) {
1608 	    /* special! */
1609 	    if (strstr(p->point, "--length") || strstr(p->point, "--offset")) {
1610 		stack_update_parser_input(p);
1611 	    }
1612 	    p->flags |= P_STACK;
1613 	}
1614     }
1615 
1616     if (p->sym == 0) {
1617 	p->idnum = const_lookup(s);
1618 	if (p->idnum > 0) {
1619 	    p->sym = CON;
1620 	} else {
1621 	    p->idnum = dummy_lookup(s, p);
1622 	    if (p->idnum > 0) {
1623 		p->sym = DUM;
1624 	    } else if (have_dset &&
1625 		       (p->idnum = current_series_index(p->dset, s)) >= 0) {
1626 		p->sym = SERIES;
1627 		p->idstr = gretl_strdup(s);
1628 	    } else if (have_dset && !strcmp(s, "time")) {
1629 		p->sym = DUM;
1630 		p->idnum = DUM_TREND;
1631 	    } else if ((p->data = get_user_var_by_name(s)) != NULL) {
1632 		user_var *u = p->data;
1633 
1634 		if (u->type == GRETL_TYPE_DOUBLE) {
1635 		    p->sym = NUM;
1636 		} else if (u->type == GRETL_TYPE_MATRIX) {
1637 		    p->sym = MAT;
1638 		} else if (u->type == GRETL_TYPE_BUNDLE) {
1639 		    p->sym = BUNDLE;
1640 		} else if (u->type == GRETL_TYPE_ARRAY) {
1641 		    p->sym = ARRAY;
1642 		} else if (u->type == GRETL_TYPE_STRING) {
1643 		    p->sym = STR;
1644 		} else if (u->type == GRETL_TYPE_LIST) {
1645 		    p->sym = LIST;
1646 		}
1647 		p->idstr = gretl_strdup(s);
1648 	    } else if (gretl_get_object_by_name(s)) {
1649 		p->sym = UOBJ;
1650 		p->idstr = gretl_strdup(s);
1651 	    } else if (defining_list(p) && varname_match_any(p->dset, s)) {
1652 		p->sym = WLIST;
1653 		p->idstr = gretl_strdup(s);
1654 	    } else if (have_dset && !strcmp(s, "t")) {
1655 		/* if "t" has not been otherwise defined, treat it
1656 		   as an alias for "obs"
1657 		*/
1658 		p->sym = DVAR;
1659 		p->idnum = R_INDEX;
1660 	    } else if (maybe_get_R_function(s)) {
1661 		/* note: all "native" types take precedence over this */
1662 		p->sym = RFUN;
1663 		p->idstr = gretl_strdup(s + 2);
1664 	    } else if (parsing_query || prevsym == B_AND) {
1665 		p->sym = UNDEF;
1666 		p->idstr = gretl_strdup(s);
1667 	    } else if (p->flags & P_AND) {
1668 		p->sym = UNDEF;
1669 		p->idstr = gretl_strdup(s);
1670 	    } else if (!strcmp(s, "pi")) {
1671 		/* deprecated */
1672 		p->idnum = CONST_PI;
1673 		p->sym = CON;
1674 	    } else {
1675 		err = E_UNKVAR;
1676 	    }
1677 	}
1678     }
1679 
1680     if (err) {
1681 	/* @s could be a function identifier with no
1682 	   following left paren */
1683 	if (is_function_word(s)) {
1684 	    function_noargs_error(s, p);
1685 	} else {
1686 	    undefined_symbol_error(s, p);
1687 	}
1688     }
1689 }
1690 
maybe_treat_as_postfix(parser * p)1691 static void maybe_treat_as_postfix (parser *p)
1692 {
1693     if (p->sym == NUM) {
1694 	const char *ok = ")]}+-*/%,:";
1695 	int c = char_past_point(p);
1696 
1697 	/* Interpret as foo++ or foo-- ? Only if
1698 	   the following character is suitable.
1699 	*/
1700 	if (c == 0 || strchr(ok, c)) {
1701 	    p->sym = p->ch == '+'? NUM_P : NUM_M;
1702 	    /* swallow the pluses or minuses */
1703 	    parser_advance(p, 1);
1704 	}
1705     }
1706 }
1707 
1708 #define dollar_series(t) (t > R_SCALAR_MAX && t < R_SERIES_MAX)
1709 
1710 #define could_be_matrix(t) (model_data_matrix(t) || \
1711 			    model_data_matrix_builder(t) || \
1712 			    t == M_UHAT || t == M_YHAT || \
1713 			    (t > R_SERIES_MAX && t < R_MAX))
1714 
1715 #define could_be_bundle(t) (t == R_RESULT)
1716 
word_check_next_char(parser * p)1717 static void word_check_next_char (parser *p)
1718 {
1719     char chk[2] = {p->ch, '\0'};
1720 
1721 #if LDEBUG
1722     if (p->ch) {
1723 	fprintf(stderr, "word_check_next_char: ch='%c', sym='%s'\n",
1724 		p->ch, getsymb(p->sym));
1725     } else {
1726 	fprintf(stderr, "word_check_next_char: ch = NUL\n");
1727     }
1728 #endif
1729     p->upsym = 0;
1730 
1731     /* Here we're checking for validity and syntactical implications
1732        of one of "([.+-" immediately following a 'word' of some kind.
1733     */
1734     if (strspn(chk, "([.+-") == 0) {
1735 	/* none of the above */
1736 	return;
1737     }
1738 
1739     if (p->sym == UNDEF) {
1740 	/* 2020-03-16: suspend disbelief, we might be in a branch
1741 	   of "cond ? x : y" that ends up not being taken */
1742 	return;
1743     }
1744 
1745     if (p->ch == '(') {
1746 	/* series or list (lag), or function */
1747 	if (p->sym == SERIES) {
1748 	    if (p->idnum > 0 && p->idnum == p->lh.vnum) {
1749 		p->flags |= P_AUTOREG;
1750 	    }
1751 	    p->upsym = p->sym;
1752 	    p->sym = LAG;
1753 	} else if (p->sym == LIST) {
1754 	    p->upsym = p->sym;
1755 	    p->sym = LAG;
1756 	} else if (p->sym == MVAR && model_data_matrix(p->idnum)) {
1757 	    /* old-style "$coeff(x1)" etc. */
1758 	    p->sym = DMSTR;
1759 	} else if (!func1_symb(p->sym) &&
1760 		   !func2_symb(p->sym) &&
1761 		   !func3_symb(p->sym) &&
1762 		   !funcn_symb(p->sym) &&
1763 		   p->sym != UFUN &&
1764 		   p->sym != RFUN) {
1765 	    p->err = E_PARSE;
1766 	}
1767     } else if (p->ch == '[') {
1768 	p->upsym = p->sym;
1769 	if (p->sym == MAT) {
1770 	    /* slice of user matrix */
1771 	    p->sym = MSL;
1772 	} else if (*p->point != '"' &&
1773 		   (p->sym == MVAR || p->sym == DVAR) &&
1774 		   could_be_matrix(p->idnum)) {
1775 	    /* slice of $-matrix? */
1776 	    p->sym = MSL;
1777 	} else if (p->sym == SERIES) {
1778 	    /* observation from series */
1779 	    p->sym = OBS;
1780 	} else if (p->sym == DVAR && dollar_series(p->idnum)) {
1781 	    /* observation from "dollar" series */
1782 	    p->sym = OBS;
1783 	} else if (p->sym == ARRAY || p->sym == LIST || p->sym == STR) {
1784 	    /* element or range of array, list or string */
1785 	    p->sym = OSL;
1786 	} else if (p->sym == MVAR && model_data_list(p->idnum)) {
1787 	    /* element/range of accessor list */
1788 	    p->sym = OSL;
1789 	} else if (p->sym == BUNDLE) {
1790 	    /* member from bundle */
1791 	    p->sym = BMEMB;
1792 	} else if (p->sym == DBUNDLE) {
1793 	    /* member from $ bundle */
1794 	    p->sym = DBMEMB;
1795 	} else if (p->sym == DVAR && could_be_bundle(p->idnum)) {
1796 	    p->sym = DBMEMB;
1797 	} else {
1798 	    p->err = E_PARSE;
1799 	}
1800     } else if (p->ch == '.' && *p->point == '$') {
1801 	if (p->sym == UOBJ) {
1802 	    /* name of saved model followed by dollar variable? */
1803 	    p->sym = MMEMB;
1804 	} else if (p->sym == CSTR) {
1805 	    /* maybe quoted name of saved object followed by
1806 	       dollar variable? */
1807 	    p->sym = MMEMB;
1808 	} else {
1809 	    p->err = E_PARSE;
1810 	}
1811     } else if (p->ch == '.' && isalpha(*p->point)) {
1812 	if (p->sym == LIST) {
1813 	    p->sym = LISTVAR;
1814 	} else if (p->sym == BUNDLE) {
1815 	    p->sym = BMEMB;
1816 	} else if (p->sym == DBUNDLE) {
1817 	    p->sym = DBMEMB;
1818 	} else if (p->sym == DVAR && could_be_bundle(p->idnum)) {
1819 	    p->sym = DBMEMB;
1820 	} else {
1821 	    p->err = E_PARSE;
1822 	}
1823     } else if (p->ch == '+' && *p->point == '+') {
1824 	maybe_treat_as_postfix(p);
1825     } else if (p->ch == '-' && *p->point == '-') {
1826 	maybe_treat_as_postfix(p);
1827     }
1828 
1829     if (p->err) {
1830 	context_error(p->ch, p, "word_check_next_char");
1831     }
1832 }
1833 
is_word_char(parser * p)1834 static int is_word_char (parser *p)
1835 {
1836     if (strchr(wordchars, p->ch) != NULL) {
1837 	return 1;
1838     } else if (defining_list(p) && !doing_genseries &&
1839 	       (p->ch == '*' || p->ch == '?')) {
1840 	return 1;
1841     }
1842 
1843     return 0;
1844 }
1845 
getword(parser * p,int greek)1846 static void getword (parser *p, int greek)
1847 {
1848     char word[32];
1849     int i = 0;
1850 
1851     if (greek) {
1852 	/* we have a single (2-byte) UTF-8 greek letter in scope */
1853 	for (i=0; i<2; i++) {
1854 	    word[i] = p->ch;
1855 	    parser_getc(p);
1856 	}
1857     } else {
1858 	/* we know the first char is acceptable (and might be '$' or '_') */
1859 	word[i++] = p->ch;
1860 	parser_getc(p);
1861 
1862 #ifdef USE_RLIB
1863 	/* allow for R.foo function namespace */
1864 	if (*word == 'R' && p->ch == '.' && *p->point != '$') {
1865 	    if (libset_get_bool(R_FUNCTIONS) && !gretl_is_bundle("R")) {
1866 		word[i++] = p->ch;
1867 		parser_getc(p);
1868 	    }
1869 	}
1870 #endif
1871 	while (p->ch != 0 && is_word_char(p) && i < 31) {
1872 	    word[i++] = p->ch;
1873 	    parser_getc(p);
1874 	}
1875     }
1876 
1877     word[i] = '\0';
1878 
1879 #if LDEBUG
1880     fprintf(stderr, "getword: word = '%s'\n", word);
1881 #endif
1882 
1883     while (p->ch != 0 && strchr(wordchars, p->ch) != NULL) {
1884 	/* flush excess word characters */
1885 	parser_getc(p);
1886     }
1887 
1888     if (p->flags & P_GETSTR) {
1889 	/* uninterpreted string wanted */
1890 	p->sym = CSTR;
1891 	p->idstr = gretl_strdup(word);
1892 	p->flags ^= P_GETSTR;
1893 	return; /* FIXME bundle-member name */
1894     } else if ((*word == '$' && word[1]) || !strcmp(word, "obs")) {
1895 	look_up_dollar_word(word, p);
1896     } else if (*word == '$' && word[1] == '\0' && (p->ch == '[' || p->ch == '.')) {
1897 	p->sym = DBUNDLE;
1898 	p->idnum = B_MODEL;
1899     } else {
1900 	look_up_word(word, p);
1901     }
1902 
1903     if (!p->err) {
1904 	word_check_next_char(p);
1905     }
1906 
1907 #if LDEBUG
1908     fprintf(stderr, "getword: p->err = %d\n", p->err);
1909 #endif
1910 }
1911 
colon_ok(parser * p,char * s,int n)1912 static int colon_ok (parser *p, char *s, int n)
1913 {
1914     int i;
1915 
1916     if (p->flags & P_SLICING) {
1917 	/* calculating a matrix "slice": colon is a separator in
1918 	   this context, cannot be part of a time/panel observation
1919 	   string
1920 	*/
1921 #if LDEBUG
1922 	fprintf(stderr, "colon_ok: doing matrix slice\n");
1923 #endif
1924 	return 0;
1925     }
1926 
1927     if (n != 1 && n != 3) {
1928 	return 0;
1929     }
1930 
1931     for (i=0; i<=n; i++) {
1932 	if (!isdigit(s[i])) {
1933 	    return 0;
1934 	}
1935     }
1936 
1937     return 1;
1938 }
1939 
1940 /* Below: we're testing p->ch for validity, given what we've already
1941    packed into string @s up to element @i, and with some regard to
1942    the next character in line.
1943 */
1944 
ok_dbl_char(parser * p,char * s,int i)1945 static int ok_dbl_char (parser *p, char *s, int i)
1946 {
1947     int ret = 0;
1948 
1949     if (i < 0 || (p->ch >= '0' && p->ch <= '9')) {
1950 	return 1;
1951     }
1952 
1953     switch (p->ch) {
1954     case '+':
1955     case '-':
1956 	ret = s[i] == 'e' || s[i] == 'E';
1957 	break;
1958     case '.':
1959 	ret = !strchr(s, '.') && !strchr(s, ':') &&
1960 	    !strchr(s, 'e') && !strchr(s, 'E') &&
1961 	    *p->point != '.';
1962 	break;
1963     case 'e':
1964     case 'E':
1965 	ret = !strchr(s, 'e') && !strchr(s, 'E') &&
1966 	    !strchr(s, ':');
1967 	break;
1968     case ':':
1969 	/* allow for obs numbers in the form, e.g., "1995:10" */
1970 	ret = colon_ok(p, s, i);
1971 	break;
1972     default:
1973 	break;
1974     }
1975 
1976     return ret;
1977 }
1978 
parse_number(parser * p)1979 static void parse_number (parser *p)
1980 {
1981     char xstr[NUMLEN] = {0};
1982     int gotcol = 0;
1983     int i = 0;
1984 
1985     while (ok_dbl_char(p, xstr, i - 1) && i < NUMLEN - 1) {
1986 	xstr[i++] = p->ch;
1987 	if (p->ch == ':') {
1988 	    gotcol = 1;
1989 	}
1990 	parser_getc(p);
1991     }
1992 
1993     while (p->ch >= '0' && p->ch <= '9') {
1994 	/* flush excess numeric characters */
1995 	parser_getc(p);
1996     }
1997 
1998 #if LDEBUG
1999     fprintf(stderr, "parse_number: xstr = '%s'\n", xstr);
2000 #endif
2001 
2002     if (gotcol) {
2003 #if LDEBUG
2004 	fprintf(stderr, " got colon: obs identifier?\n");
2005 #endif
2006 	if (p->dset == NULL || p->dset->n == 0) {
2007 	    p->err = E_NODATA;
2008 	} else if (p->dset->pd == 1) {
2009 	    p->err = E_PDWRONG;
2010 	} else if (dateton(xstr, p->dset) < 0) {
2011 	    p->err = E_DATA;
2012 	} else {
2013 	    p->idstr = gretl_strdup(xstr);
2014 	    p->sym = CSTR;
2015 	}
2016     } else {
2017 	p->xval = dot_atof(xstr);
2018 	p->sym = CNUM;
2019 #if LDEBUG
2020 	fprintf(stderr, " dot_atof gave %g\n", p->xval);
2021 #endif
2022     }
2023 }
2024 
wildcard_special(parser * p)2025 static int wildcard_special (parser *p)
2026 {
2027     char cprev = *(p->point - 2);
2028 
2029     if (p->ch == '?') {
2030 	char cnext = *p->point;
2031 
2032 	if ((cprev == ' ' || cprev == ')') && cnext == ' ') {
2033 	    /* '?' is presumably ternary operator */
2034 	    return 0;
2035 	}
2036     }
2037 
2038     if (cprev == ' ' &&
2039 	(bare_data_type(p->sym) || closing_sym(p->sym) ||
2040 	 (p->sym == LAG))) {
2041 	p->sym = B_LCAT;
2042     } else {
2043 	getword(p, 0);
2044     }
2045 
2046     return 1;
2047 }
2048 
2049 /* 0xE2 0x88 0x92 = UTF-8 minus */
2050 
lex_try_utf8(parser * p)2051 static void lex_try_utf8 (parser *p)
2052 {
2053     if ((unsigned char) *p->point == 0x88 &&
2054 	(unsigned char) *(p->point + 1) == 0x92) {
2055 	p->sym = B_SUB;
2056 	parser_getc(p);
2057 	parser_getc(p);
2058 	parser_getc(p);
2059     } else {
2060 	pprintf(p->prn, _("Unexpected byte 0x%x\n"),
2061 		(unsigned char) p->ch);
2062 	p->err = E_PARSE;
2063     }
2064 }
2065 
2066 #define word_start_special(c) (c == '$' || c == '_')
2067 
2068 /* accept 'to', but only with spaces before and after */
2069 #define lag_range_sym(p) ((p->flags & P_LAGPRSE) && p->ch == 't' && \
2070                           *p->point == 'o' && \
2071 			  *(p->point - 2) == ' ' && \
2072 			  *(p->point + 1) == ' ')
2073 
lex(parser * p)2074 void lex (parser *p)
2075 {
2076     static int prevsyms[2];
2077 
2078 #if LDEBUG
2079     if (p->ch) {
2080 	fprintf(stderr, "lex: p->ch='%c', point='%c'\n", p->ch, *p->point);
2081     } else {
2082 	fprintf(stderr, "lex: p->ch is NUL\n");
2083     }
2084 #endif
2085     prevsyms[0] = prevsyms[1];
2086     prevsyms[1] = p->sym;
2087 
2088     if (p->ch == 0) {
2089 	p->sym = EOT;
2090 	return;
2091     }
2092 
2093     while (p->ch != 0) {
2094 	if ((unsigned char) p->ch == 0xE2) {
2095 	    lex_try_utf8(p);
2096 	    return;
2097 	} else if (is_greek_letter(p->point - 1)) {
2098 	    getword(p, 1);
2099 	    return;
2100 	}
2101 	switch (p->ch) {
2102 	case ' ':
2103 	case '\t':
2104 	case '\r':
2105         case '\n':
2106 	    parser_getc(p);
2107 	    break;
2108         case '+':
2109 	    p->sym = B_ADD;
2110 	    parser_getc(p);
2111 	    return;
2112         case '-':
2113 	    p->sym = B_SUB;
2114 	    parser_getc(p);
2115 	    return;
2116         case '*':
2117 	    if (defining_list(p) && !doing_genseries) {
2118 		/* allow for '*' as wildcard */
2119 		wildcard_special(p);
2120 		return;
2121 	    }
2122 	    parser_getc(p);
2123 	    if (p->ch == '*') {
2124 		p->sym = B_KRON;
2125 		parser_getc(p);
2126 	    } else {
2127 		p->sym = B_MUL;
2128 	    }
2129 	    return;
2130 	case '\'':
2131 	    p->sym = B_TRMUL;
2132 	    parser_getc(p);
2133 	    return;
2134         case '/':
2135 	    p->sym = B_DIV;
2136 	    parser_getc(p);
2137 	    return;
2138         case '\\':
2139 	    p->sym = B_LDIV;
2140 	    parser_getc(p);
2141 	    return;
2142         case '%':
2143 	    p->sym = B_MOD;
2144 	    parser_getc(p);
2145 	    return;
2146         case '^':
2147 	    p->sym = B_POW;
2148 	    parser_getc(p);
2149 	    return;
2150         case '&':
2151 	    parser_getc(p);
2152 	    if (p->ch == '&') {
2153 		p->sym = B_AND;
2154 		parser_getc(p);
2155 	    } else {
2156 		p->sym = U_ADDR;
2157 	    }
2158 	    return;
2159         case '|':
2160 	    parser_getc(p);
2161 	    if (p->ch == '|') {
2162 		p->sym = B_OR;
2163 		parser_getc(p);
2164 	    } else {
2165 		p->sym = B_VCAT;
2166 	    }
2167 	    return;
2168         case '!':
2169 	    parser_getc(p);
2170 	    if (p->ch == '=') {
2171 		p->sym = B_NEQ;
2172 		parser_getc(p);
2173 	    } else {
2174 		p->sym = U_NOT;
2175 	    }
2176 	    return;
2177         case '=':
2178 	    parser_getc(p);
2179 	    if (p->ch == '=') {
2180 		parser_getc(p);
2181 		p->sym = B_EQ;
2182 	    } else {
2183 		gretl_errmsg_set(_("If you meant to test for "
2184 				   "equality, please use '=='"));
2185 		p->err = E_PARSE;
2186 	    }
2187 	    return;
2188         case '>':
2189 	    parser_getc(p);
2190 	    if (p->ch == '=') {
2191 		p->sym = B_GTE;
2192 		parser_getc(p);
2193 	    } else {
2194 		p->sym = B_GT;
2195 	    }
2196 	    return;
2197         case '<':
2198 	    parser_getc(p);
2199 	    if (p->ch == '=') {
2200 		p->sym = B_LTE;
2201 		parser_getc(p);
2202 	    } else if (p->ch == '>') {
2203 		p->sym = B_NEQ;
2204 		parser_getc(p);
2205 	    } else {
2206 		p->sym = B_LT;
2207 	    }
2208 	    return;
2209         case '(':
2210 	    p->sym = G_LPR;
2211 	    parser_getc(p);
2212 	    return;
2213         case ')':
2214 	    p->sym = G_RPR;
2215 	    parser_getc(p);
2216 	    return;
2217         case '[':
2218 	    p->sym = G_LBR;
2219 	    parser_getc(p);
2220 	    return;
2221         case '{':
2222 	    p->sym = G_LCB;
2223 	    parser_getc(p);
2224 	    return;
2225         case '}':
2226 	    p->sym = G_RCB;
2227 	    parser_getc(p);
2228 	    return;
2229         case ']':
2230 	    p->sym = G_RBR;
2231 	    parser_getc(p);
2232 	    return;
2233         case '~':
2234 	    p->sym = B_HCAT;
2235 	    parser_getc(p);
2236 	    return;
2237         case ',':
2238 	    p->sym = P_COM;
2239 	    parser_getc(p);
2240 	    return;
2241         case ';':
2242 	    if (defining_list(p)) {
2243 		p->sym = B_JOIN;
2244 	    } else {
2245 		/* used in matrix definition */
2246 		p->sym = P_SEMI;
2247 	    }
2248 	    parser_getc(p);
2249 	    return;
2250         case ':':
2251 	    p->sym = P_COL;
2252 	    parser_getc(p);
2253 	    return;
2254         case '?':
2255 	    if (defining_list(p) && !doing_genseries) {
2256 		/* allow for '?' as wildcard */
2257 		if (wildcard_special(p)) {
2258 		    return;
2259 		}
2260 	    }
2261 	    p->sym = QUERY;
2262 	    parser_getc(p);
2263 	    return;
2264 	case '.':
2265 	    if (*p->point == '$') {
2266 		p->sym = P_DOT;
2267 		parser_getc(p);
2268 		return;
2269 	    } else if (isalpha(*p->point)) {
2270 		/* 2017-01-07 */
2271 		p->sym = BMEMB;
2272 		parser_getc(p);
2273 		return;
2274 	    }
2275 	    parser_getc(p);
2276 	    if (p->ch == '*') {
2277 		p->sym = B_DOTMULT;
2278 		parser_getc(p);
2279 		return;
2280 	    } else if (p->ch == '/') {
2281 		p->sym = B_DOTDIV;
2282 		parser_getc(p);
2283 		return;
2284 	    } else if (p->ch == '^') {
2285 		p->sym = B_DOTPOW;
2286 		parser_getc(p);
2287 		return;
2288 	    } else if (p->ch == '+') {
2289 		p->sym = B_DOTADD;
2290 		parser_getc(p);
2291 		return;
2292 	    } else if (p->ch == '-') {
2293 		p->sym = B_DOTSUB;
2294 		parser_getc(p);
2295 		return;
2296 	    } else if (p->ch == '=') {
2297 		p->sym = B_DOTEQ;
2298 		parser_getc(p);
2299 		return;
2300 	    } else if (p->ch == '>') {
2301 		p->sym = B_DOTGT;
2302 		parser_getc(p);
2303 		if (p->ch == '=') {
2304 		    p->sym = B_DOTGTE;
2305 		    parser_getc(p);
2306 		}
2307 		return;
2308 	    } else if (p->ch == '<') {
2309 		p->sym = B_DOTLT;
2310 		parser_getc(p);
2311 		if (p->ch == '=') {
2312 		    p->sym = B_DOTLTE;
2313 		    parser_getc(p);
2314 		}
2315 		return;
2316 	    } else if (p->ch == '!' && *p->point == '=') {
2317 		p->sym = B_DOTNEQ;
2318 		parser_getc(p);
2319 		parser_getc(p);
2320 		return;
2321 	    } else if (p->ch == '.') {
2322 		p->sym = B_ELLIP;
2323 		parser_getc(p);
2324 		return;
2325 	    } else {
2326 		/* not a "dot operator", so back up */
2327 		parser_ungetc(p);
2328 	    }
2329 	    /* Falls through. */
2330         default:
2331 	    if (defining_list(p) && lag_range_sym(p)) {
2332 		p->sym = B_RANGE;
2333 		parser_getc(p);
2334 		parser_getc(p);
2335 		return;
2336 	    }
2337 	    if (defining_list(p) && !doing_genseries &&
2338 		(bare_data_type(p->sym) || closing_sym(p->sym) ||
2339 		 p->sym == LAG) && *(p->point - 2) == ' ') {
2340 		/* may be forming a list, but only if there are
2341 		   spaces between the terms
2342 		*/
2343 		p->sym = B_LCAT;
2344 		return;
2345 	    }
2346 	    if (isdigit(p->ch)) {
2347 		parse_number(p);
2348 		return;
2349 	    } else if (p->ch == '.' && isdigit(*p->point)) {
2350 		parse_number(p);
2351 		return;
2352 	    } else if (islower(p->ch) || isupper(p->ch) ||
2353 		       word_start_special(p->ch)) {
2354 		getword(p, 0);
2355 		return;
2356 	    } else if (p->ch == '"') {
2357 		p->idstr = get_quoted_string(p, prevsyms[0]);
2358 		return;
2359 	    } else {
2360 		parser_print_input(p);
2361 		if (isprint(p->ch)) {
2362 		    pprintf(p->prn, _("Invalid character '%c'\n"), p->ch);
2363 		} else {
2364 		    pprintf(p->prn, _("Unexpected byte 0x%x\n"),
2365 			    (unsigned char) p->ch);
2366 		}
2367 		p->err = E_PARSE;
2368 		return;
2369 	    }
2370 	} /* end ch switch */
2371     } /* end while ch != 0 */
2372 }
2373 
getsymb_full(int t,const parser * p)2374 const char *getsymb_full (int t, const parser *p)
2375 {
2376     if ((t > F1_MIN && t < F1_MAX) ||
2377 	(t > F1_MAX && t < F2_MAX) ||
2378 	(t > F2_MAX && t < FN_MAX)) {
2379 	return funname(t);
2380     }
2381 
2382     if (t == EOT) {
2383 	return "EOT";
2384     }
2385 
2386     /* yes, well */
2387     if (t == OBS) {
2388 	return "OBS";
2389     } else if (t == MSL) {
2390 	return "MSL";
2391     } else if (t == OSL) {
2392 	return "OSL";
2393     } else if (t == SUB_ADDR) {
2394 	return "SUB_ADDR";
2395     } else if (t == DMSTR) {
2396 	return "DMSTR";
2397     } else if (t == SLRAW) {
2398 	return "SLRAW";
2399     } else if (t == MSPEC) {
2400 	return "MSPEC";
2401     } else if (t == SUBSL) {
2402 	return "SUBSL";
2403     } else if (t == MDEF) {
2404 	return "MDEF";
2405     } else if (t == FARGS) {
2406 	return "FARGS";
2407     } else if (t == LIST || t == WLIST) {
2408 	return "LIST";
2409     } else if (t == EMPTY) {
2410 	return "EMPTY";
2411     } else if (t == LISTVAR) {
2412 	return "LISTVAR";
2413     } else if (t == BMEMB) {
2414 	return "BMEMB";
2415     } else if (t == SERIES) {
2416 	return "SERIES";
2417     } else if (t == MAT) {
2418 	return "MAT";
2419     } else if (t == UNDEF) {
2420 	return "UNDEF";
2421     } else if (t == NUM) {
2422 	return "NUM";
2423     } else if (t == CNUM) {
2424 	return "CNUM";
2425     } else if (t == IVEC) {
2426 	return "IVEC";
2427     } else if (t == NUM_P) {
2428 	return "NUM_P";
2429     } else if (t == NUM_M) {
2430 	return "NUM_M";
2431     } else if (t == DBUNDLE) {
2432 	return "DBUNDLE";
2433     } else if (t == DBMEMB) {
2434 	return "DBMEMB";
2435     } else if (t == DBMEMB) {
2436 	return "MMEMB";
2437     } else if (t == PTR) {
2438 	return "PTR";
2439     }
2440 
2441     if (p != NULL) {
2442 	if (t == BUNDLE) {
2443 	    return p->idstr;
2444 	} else if (t == ARRAY) {
2445 	    return p->idstr;
2446 	} else if (t == UOBJ) {
2447 	    return p->idstr;
2448 	} else if (t == CON) {
2449 	    return constname(p->idnum);
2450 	} else if (t == DUM) {
2451 	    return dumname(p->idnum);
2452 	} else if (t == DVAR) {
2453 	    return dvarname(p->idnum);
2454 	} else if (t == MVAR) {
2455 	    return mvarname(p->idnum);
2456 	} else if (t == UFUN || t == RFUN) {
2457 	    return p->idstr;
2458 	} else if (t == STR || t == CSTR) {
2459 	    return p->idstr;
2460 	}
2461     } else {
2462 	if (t == BUNDLE) {
2463 	    return "BUNDLE";
2464 	} else if (t == ARRAY) {
2465 	    return "ARRAY";
2466 	} else if (t == UOBJ) {
2467 	    return "UOBJ";
2468 	} else if (t == CON) {
2469 	    return "CON";
2470 	} else if (t == DUM) {
2471 	    return "dummy constant";
2472 	} else if (t == DVAR) {
2473 	    return "DVAR";
2474 	} else if (t == MVAR) {
2475 	    return "MVAR";
2476 	} else if (t == UFUN) {
2477 	    return "UFUN";
2478 	} else if (t == RFUN) {
2479 	    return "RFUN";
2480 	} else if (t == STR) {
2481 	    return "STR";
2482 	} else if (t == CSTR) {
2483 	    return "CSTR";
2484 	}
2485     }
2486 
2487     switch (t) {
2488     case B_ASN:
2489 	return "=";
2490     case B_ADD:
2491     case U_POS:
2492 	return "+";
2493     case B_SUB:
2494     case U_NEG:
2495 	return "-";
2496     case B_MUL:
2497 	return "*";
2498     case B_TRMUL:
2499 	return "'";
2500     case B_DIV:
2501 	return "/";
2502     case B_LDIV:
2503 	return "\\";
2504     case B_MOD:
2505 	return "%";
2506     case B_POW:
2507 	return "^";
2508     case B_EQ:
2509 	return "==";
2510     case B_NEQ:
2511 	return "!=";
2512     case B_GT:
2513 	return ">";
2514     case B_LT:
2515 	return "<";
2516     case B_GTE:
2517 	return ">=";
2518     case B_LTE:
2519 	return "<=";
2520     case B_AND:
2521 	return "&&";
2522     case B_JOIN:
2523 	return "JOIN";
2524     case B_RANGE:
2525 	return " to ";
2526     case B_ELLIP:
2527 	return "..";
2528     case U_ADDR:
2529 	return "&";
2530     case B_OR:
2531 	return "||";
2532     case U_NOT:
2533 	return "!";
2534     case G_LPR:
2535 	return "(";
2536     case G_RPR:
2537 	return ")";
2538     case G_LBR:
2539 	return "[";
2540     case G_RBR:
2541 	return "]";
2542     case G_LCB:
2543 	return "{";
2544     case G_RCB:
2545 	return "}";
2546     case B_DOTMULT:
2547 	return ".*";
2548     case B_DOTDIV:
2549 	return "./";
2550     case B_DOTPOW:
2551 	return ".^";
2552     case B_DOTADD:
2553 	return ".+";
2554     case B_DOTSUB:
2555 	return ".-";
2556     case B_DOTEQ:
2557 	return ".=";
2558     case B_DOTGT:
2559 	return ".>";
2560     case B_DOTLT:
2561 	return ".<";
2562     case B_DOTGTE:
2563 	return ".>=";
2564     case B_DOTLTE:
2565 	return ".<=";
2566     case B_DOTNEQ:
2567 	return ".!=";
2568     case B_DOTASN:
2569 	return "dot-assign";
2570     case B_KRON:
2571 	return "**";
2572     case B_HCAT:
2573 	return "~";
2574     case B_VCAT:
2575 	return "|";
2576     case B_LCAT:
2577 	return "LCAT";
2578     case P_COM:
2579 	return ",";
2580     case P_DOT:
2581 	return ".";
2582     case P_SEMI:
2583 	return ";";
2584     case P_COL:
2585 	return ":";
2586     case QUERY:
2587 	return "query";
2588     case LAG:
2589 	return "lag";
2590     default:
2591 	break;
2592     }
2593 
2594     return "unknown";
2595 }
2596 
getsymb(int t)2597 const char *getsymb (int t)
2598 {
2599     return getsymb_full(t, NULL);
2600 }
2601