1 /*
2 * gretl -- Gnu Regression, Econometrics and Time-series Library
3 * Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 /* lexer module for 'genr' and related commands */
21
22 #include "genparse.h"
23 #include "gretl_func.h"
24 #include "uservar.h"
25 #include "gretl_string_table.h"
26 #include "gretl_normal.h"
27 #include "gretl_bundle.h"
28 #include "uservar_priv.h"
29
30 #define NUMLEN 32
31 #define MAXQUOTE 64
32
33 #if GENDEBUG
34 # define LDEBUG 1
35 #else
36 # define LDEBUG 0
37 #endif
38
39 static int parser_next_char (parser *p);
40
41 #define defining_list(p) (p->flags & P_LISTDEF)
42
43 #define bare_data_type(s) (s > PUNCT_MAX && s < DTYPE_MAX)
44
45 #define closing_sym(s) (s == G_RPR || s == G_RBR || s == G_RCB)
46
47 const char *wordchars = "abcdefghijklmnopqrstuvwxyz"
48 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
49 "0123456789_";
50
51 struct str_table {
52 int id;
53 const char *str;
54 };
55
56 struct str_table_ex {
57 int id;
58 const char *str;
59 void *ptr;
60 };
61
62 struct str_table consts[] = {
63 { CONST_PI, "$pi" },
64 { CONST_NA, "NA" },
65 { CONST_INF, "$inf" },
66 { CONST_NAN, "$nan" },
67 { CONST_WIN32, "WIN32" },
68 { CONST_EPS, "$macheps" },
69 { CONST_HAVE_MPI, "$havempi" },
70 { CONST_MPI_RANK, "$mpirank" },
71 { CONST_MPI_SIZE, "$mpisize" },
72 { CONST_N_PROC, "$nproc" },
73 { CONST_TRUE, "TRUE" },
74 { CONST_FALSE, "FALSE" },
75 { 0, NULL }
76 };
77
78 struct str_table dummies[] = {
79 { DUM_NULL, "null" },
80 { DUM_DIAG, "diag" },
81 { DUM_UPPER, "upper" },
82 { DUM_LOWER, "lower" },
83 { DUM_REAL, "real" },
84 { DUM_IMAG, "imag" },
85 { DUM_END, "end" },
86 { DUM_DATASET, "dataset" },
87 { 0, NULL }
88 };
89
90 /* Identify matrix-selection dummy constants:
91 these can be valid only between '[' and ']'.
92 */
93 #define MSEL_DUM(d) (d >= DUM_DIAG && d <= DUM_END)
94
95 /* dvars: dataset- and test-related accessors */
96
97 struct str_table dvars[] = {
98 { R_NOBS, "$nobs" },
99 { R_NVARS, "$nvars" },
100 { R_PD, "$pd" },
101 { R_T1, "$t1" },
102 { R_T2, "$t2" },
103 { R_TMAX, "$tmax" },
104 { R_DATATYPE, "$datatype" },
105 { R_TEST_STAT, "$test" },
106 { R_TEST_PVAL, "$pvalue" },
107 { R_TEST_BRK, "$qlrbreak" },
108 { R_TEST_LNL, "$rlnl" },
109 { R_STOPWATCH, "$stopwatch" },
110 { R_PUNIT, "$unit" },
111 { R_OBSMAJ, "$obsmajor" },
112 { R_OBSMIN, "$obsminor" },
113 { R_OBSMIC, "$obsmicro" },
114 { R_DATES, "$obsdate" },
115 { R_WINDOWS, "$windows" },
116 { R_VERSION, "$version" },
117 { R_ERRNO, "$error" },
118 { R_SEED, "$seed" },
119 { R_HUGE, "$huge" },
120 { R_NOW, "$now" },
121 { R_RESULT, "$result" },
122 { R_PNGFONT, "$pngfont" },
123 { R_MAPFILE, "$mapfile" },
124 { R_MAP, "$map" },
125 { R_INDEX, "obs" },
126 { R_LOGLEVEL, "$loglevel" },
127 { R_LOGSTAMP, "$logstamp" },
128 { 0, NULL },
129 };
130
131 /* mvars: model-related accessors */
132
133 struct str_table mvars[] = {
134 { M_ESS, "$ess" },
135 { M_T, "$T" },
136 { M_RSQ, "$rsq" },
137 { M_SIGMA, "$sigma" },
138 { M_DF, "$df" },
139 { M_NCOEFF, "$ncoeff" },
140 { M_LNL, "$lnl" },
141 { M_GMMCRIT, "$gmmcrit" },
142 { M_AIC, "$aic" },
143 { M_BIC, "$bic" },
144 { M_HQC, "$hqc" },
145 { M_TRSQ, "$trsq" },
146 { M_DW, "$dw" },
147 { M_DWPVAL, "$dwpval" },
148 { M_FSTT, "$Fstat" },
149 { M_CHISQ, "$chisq" },
150 { M_DIAGTEST, "$diagtest" },
151 { M_DIAGPVAL, "$diagpval" },
152 { M_PMANTEAU, "$pmanteau" },
153 { M_UHAT, "$uhat" },
154 { M_YHAT, "$yhat" },
155 { M_LLT, "$llt" },
156 { M_AHAT, "$ahat" },
157 { M_SAMPLE, "$sample" },
158 { M_H, "$h" },
159 { M_COEFF, "$coeff" },
160 { M_SE, "$stderr" },
161 { M_VCV, "$vcv" },
162 { M_RHO, "$rho" },
163 { M_COMPAN, "$compan" },
164 { M_XTXINV, "$xtxinv" },
165 { M_VECG, "$vecGamma" },
166 { M_VMA, "$vma" },
167 { M_FEVD, "$fevd" },
168 { M_EVALS, "$evals" },
169 { M_JALPHA, "$jalpha" },
170 { M_JBETA, "$jbeta" },
171 { M_JVBETA, "$jvbeta" },
172 { M_JS00, "$s00" },
173 { M_JS11, "$s11" },
174 { M_JS01, "$s01" },
175 { M_EC, "$ec" },
176 { M_HAUSMAN, "$hausman" },
177 { M_SARGAN, "$sargan" },
178 { M_SYSGAM, "$sysGamma" },
179 { M_SYSA, "$sysA" },
180 { M_SYSB, "$sysB" },
181 { M_FCAST, "$fcast" },
182 { M_FCSE, "$fcse" },
183 { M_COEFF_CI,"$coeff_ci" },
184 { M_EHAT, "$ehat" },
185 { M_MNLPROBS, "$mnlprobs" },
186 { M_XLIST, "$xlist" },
187 { M_YLIST, "$ylist" },
188 { M_COMMAND, "$command" },
189 { M_DEPVAR, "$depvar" },
190 { M_PARNAMES, "$parnames" },
191 { 0, NULL }
192 };
193
194 /* bvars: bundle accessors */
195
196 struct str_table bvars[] = {
197 { B_MODEL, "$model" },
198 { B_SYSTEM, "$system" },
199 { B_SYSINFO, "$sysinfo" },
200 { 0, NULL }
201 };
202
203 /* Below, @ptrfuncs: table of functions for which we wish to
204 attach function-pointers to the relevant NODE. Nota bene:
205 it's crucial that no function in @ptrfuncs is also listed
206 in @funcs below!
207
208 The order of function symbols in @ptrfuncs need not match
209 the order in which they're listed in genparse.h, but it's
210 crucial that every function with ID number <= FP_MAX has
211 an entry in @ptrfuncs.
212
213 "Crucial" -> certain crash on calling wrongly classified
214 function!
215 */
216
217 struct str_table_ex ptrfuncs[] = {
218 { F_ABS, "abs", fabs },
219 { F_CEIL, "ceil", ceil },
220 { F_FLOOR, "floor", floor },
221 { F_SIN, "sin", sin },
222 { F_COS, "cos", cos },
223 { F_TAN, "tan", tan },
224 { F_ASIN, "asin", asin },
225 { F_ACOS, "acos", acos },
226 { F_ATAN, "atan", atan },
227 { F_SINH, "sinh", sinh },
228 { F_COSH, "cosh", cosh },
229 { F_TANH, "tanh", tanh },
230 { F_ASINH, "asinh", asinh },
231 { F_ACOSH, "acosh", acosh },
232 { F_ATANH, "atanh", atanh },
233 { F_LOG, "log", log },
234 { F_LOG10, "log10", log10 },
235 { F_LOG2, "log2", log2 },
236 { F_EXP, "exp", exp },
237 { F_SQRT, "sqrt", sqrt },
238 { F_GAMMA, "gammafun", gammafun },
239 { F_LNGAMMA, "lngamma", lngamma },
240 { F_DIGAMMA, "digamma", digamma },
241 { F_TRIGAMMA, "trigamma", trigamma },
242 { F_INVMILLS, "invmills", invmills },
243 { F_ROUND, "round", gretl_round },
244 { F_SGN, "sgn", gretl_sgn },
245 { F_CNORM, "cnorm", normal_cdf },
246 { F_DNORM, "dnorm", normal_pdf },
247 { F_QNORM, "qnorm", normal_cdf_inverse },
248 { F_CARG, "carg", carg },
249 { F_CMOD, "cmod", cabs },
250 { F_REAL, "Re", creal },
251 { F_IMAG, "Im", cimag },
252 { F_LOGISTIC, "logistic", logistic_cdf },
253 { 0, NULL, NULL }
254 };
255
256 struct str_table funcs[] = {
257 { F_ATAN2, "atan2" },
258 { F_DIFF, "diff" },
259 { F_LDIFF, "ldiff" },
260 { F_SDIFF, "sdiff" },
261 { F_LLAG, "lags" },
262 { F_HFLAG, "hflags" },
263 { F_DROPCOLL, "dropcoll" },
264 { F_TOINT, "int" },
265 { F_SORT, "sort" },
266 { F_DSORT, "dsort" },
267 { F_SORTBY, "sortby" },
268 { F_RANKING, "ranking" },
269 { F_ODEV, "orthdev" },
270 { F_NOBS, "nobs" },
271 { F_T1, "firstobs" },
272 { F_T2, "lastobs" },
273 { F_RUNIFORM, "uniform" },
274 { F_RNORMAL, "normal" },
275 { F_CUM, "cum" },
276 { F_MISSING, "missing" },
277 { F_DATAOK, "ok" }, /* opposite of missing */
278 { F_MISSZERO, "misszero" },
279 { F_LRVAR, "lrvar" },
280 { F_LRCOVAR, "lrcovar" },
281 { F_FEVD, "fevd" },
282 { F_QUANTILE, "quantile" },
283 { F_MEDIAN, "median" },
284 { F_GINI, "gini" },
285 { F_ZEROMISS, "zeromiss" },
286 { F_SUM, "sum" },
287 { F_SUMALL, "sumall" },
288 { F_MEAN, "mean" },
289 { F_MIN, "min" },
290 { F_MAX, "max" },
291 { F_SD, "sd" },
292 { F_VCE, "var" },
293 { F_SKEWNESS, "skewness" },
294 { F_KURTOSIS, "kurtosis" },
295 { F_SST, "sst" },
296 { F_RESAMPLE, "resample" },
297 { F_PNOBS, "pnobs" }, /* per-unit nobs in panels */
298 { F_PMIN, "pmin" }, /* panel min */
299 { F_PMAX, "pmax" }, /* panel max */
300 { F_PSUM, "psum" }, /* panel sum */
301 { F_PMEAN, "pmean" }, /* panel mean */
302 { F_PXSUM, "pxsum" }, /* panel x-sectional sum */
303 { F_PXNOBS, "pxnobs" }, /* panel x-sectional obs count */
304 { F_PSD, "psd" }, /* panel std dev */
305 { F_PSHRINK, "pshrink" },
306 { F_PEXPAND, "pexpand" },
307 { F_HPFILT, "hpfilt" }, /* Hodrick-Prescott filter */
308 { F_BKFILT, "bkfilt" }, /* Baxter-King filter */
309 { F_BWFILT, "bwfilt" }, /* Butterworth filter */
310 { F_FRACDIFF, "fracdiff" }, /* fractional difference */
311 { F_BOXCOX, "boxcox" }, /* Box-Cox transformation */
312 { F_COV, "cov" },
313 { F_COR, "corr" },
314 { F_MOVAVG, "movavg" },
315 { F_IMAT, "I" },
316 { F_ZEROS, "zeros" },
317 { F_ONES, "ones" },
318 { F_SEQ, "seq" },
319 { F_REPLACE, "replace" },
320 { F_MUNIF, "muniform" },
321 { F_MNORM, "mnormal" },
322 { F_SUMC, "sumc" },
323 { F_SUMR, "sumr" },
324 { F_PRODC, "prodc" },
325 { F_PRODR, "prodr" },
326 { F_MEANC, "meanc" },
327 { F_MEANR, "meanr" },
328 { F_SDC, "sdc" },
329 { F_MINC, "minc" },
330 { F_MAXC, "maxc" },
331 { F_MINR, "minr" },
332 { F_MAXR, "maxr" },
333 { F_IMINC, "iminc" },
334 { F_IMAXC, "imaxc" },
335 { F_IMINR, "iminr" },
336 { F_IMAXR, "imaxr" },
337 { F_FFT, "fft" },
338 { F_FFT2, "fft2" },
339 { F_FFTI, "ffti" },
340 { F_CMULT, "cmult" },
341 { F_HDPROD, "hdprod" },
342 { F_CDIV, "cdiv" },
343 { F_MCOV, "mcov" },
344 { F_MCORR, "mcorr" },
345 { F_MXTAB, "mxtab" },
346 { F_CDEMEAN, "cdemean" },
347 { F_CHOL, "cholesky" },
348 { F_PSDROOT, "psdroot" },
349 { F_INSTRINGS, "instrings" },
350 { F_INV, "inv" },
351 { F_INVPD, "invpd" },
352 { F_GINV, "ginv" },
353 { F_DIAG, "diag" },
354 { F_TRANSP, "transp" },
355 { F_CTRANS, "ctrans" },
356 { F_VEC, "vec" },
357 { F_VECH, "vech" },
358 { F_UNVECH, "unvech" },
359 { F_UPPER, "upper" },
360 { F_LOWER, "lower" },
361 { F_ROWS, "rows" },
362 { F_COLS, "cols" },
363 { F_DET, "det" },
364 { F_LDET, "ldet" },
365 { F_TRACE, "tr" },
366 { F_NORM1, "onenorm" },
367 { F_INFNORM, "infnorm" },
368 { F_RCOND, "rcond" },
369 { F_RANK, "rank" },
370 { F_QFORM, "qform" },
371 { F_MLAG, "mlag" },
372 { F_QR, "qrdecomp" },
373 { F_EIGSYM, "eigensym" },
374 { F_EIGGEN, "eigengen" }, /* legacy */
375 { F_EIGEN, "eigen" },
376 { F_SCHUR, "schur" },
377 { F_EIGSOLVE, "eigsolve" },
378 { F_NULLSPC, "nullspace" },
379 { F_PRINCOMP, "princomp" },
380 { F_MEXP, "mexp" },
381 { F_MLOG, "mlog" },
382 { F_FDJAC, "fdjac" },
383 { F_BFGSMAX, "BFGSmax" },
384 { F_BFGSCMAX, "BFGScmax" },
385 { F_NRMAX, "NRmax" },
386 { F_NUMHESS, "numhess" },
387 { F_OBSNUM, "obsnum" },
388 { F_ISDISCR, "isdiscrete" },
389 { F_ISDUMMY, "isdummy"},
390 { F_TYPEOF, "typeof" },
391 { F_EXISTS, "exists" },
392 { F_NELEM, "nelem" },
393 { F_PDF, "pdf" },
394 { F_CDF, "cdf" },
395 { F_INVCDF, "invcdf" },
396 { F_PVAL, "pvalue" },
397 { F_CRIT, "critical" },
398 { F_RANDGEN, "randgen" },
399 { F_MRANDGEN, "mrandgen" },
400 { F_RANDGEN1, "randgen1" },
401 { F_URCPVAL, "urcpval" },
402 { F_QLRPVAL, "qlrpval" },
403 { F_VALUES, "values" },
404 { F_UNIQ, "uniq" },
405 { F_MSHAPE, "mshape" },
406 { F_SVD, "svd" },
407 { F_MOLS, "mols" },
408 { F_MPOLS, "mpols" },
409 { F_MRLS, "mrls" },
410 { F_MREAD, "mread" },
411 { F_MWRITE, "mwrite" },
412 { F_BREAD, "bread" },
413 { F_BWRITE, "bwrite" },
414 { F_MCSEL, "selifc" },
415 { F_MRSEL, "selifr" },
416 { F_POLROOTS, "polroots" },
417 { F_DUMIFY, "dummify" },
418 { F_WMEAN, "wmean" },
419 { F_WVAR, "wvar" },
420 { F_WSD, "wsd" },
421 { F_SQUARE, "square" },
422 { F_FILTER, "filter" },
423 { F_KFILTER, "kfilter" },
424 { F_KSMOOTH, "ksmooth" },
425 { F_KDSMOOTH, "kdsmooth" },
426 { F_KSIMUL, "ksimul" },
427 { F_KSIMDATA, "ksimdata" },
428 { F_TRIMR, "trimr" },
429 { F_GETENV, "getenv" },
430 { F_NGETENV, "ngetenv" },
431 { F_ARGNAME, "argname" },
432 { F_OBSLABEL, "obslabel" },
433 { F_READFILE, "readfile" },
434 { F_BACKTICK, "grab" },
435 { F_STRSTR, "strstr" },
436 { F_INSTRING, "instring" },
437 { F_STRSTRIP, "strstrip" },
438 { F_STRNCMP, "strncmp" },
439 { F_STRLEN, "strlen" },
440 { F_PRINTF, "printf" },
441 { F_SPRINTF, "sprintf" },
442 { F_SSCANF, "sscanf" },
443 { F_VARNAME, "varname" },
444 { F_VARNAMES, "varnames" },
445 { F_VARNUM, "varnum" },
446 { F_TOLOWER, "tolower" },
447 { F_TOUPPER, "toupper" },
448 { F_CNAMESET, "cnameset" },
449 { F_RNAMESET, "rnameset" },
450 { F_LJUNGBOX, "ljungbox" },
451 { F_MSORTBY, "msortby" },
452 { F_LINCOMB, "lincomb" },
453 { F_IMHOF, "imhof" },
454 { F_TOEPSOLV, "toepsolv" },
455 { F_DSUM, "diagcat" },
456 { F_XMIN, "xmin" },
457 { F_XMAX, "xmax" },
458 { F_CORRGM, "corrgm" },
459 { F_MCOVG, "mcovg" },
460 { F_FCSTATS, "fcstats" },
461 { F_BESSEL, "bessel" },
462 { F_FRACLAG, "fraclag" },
463 { F_MREV, "mreverse" },
464 { F_DESEAS, "deseas" },
465 { F_TRAMOLIN, "linearize" },
466 { F_PERGM, "pergm" },
467 { F_IRR, "irr" },
468 { F_NPV, "npv" },
469 { F_WEEKDAY, "weekday" },
470 { F_KDENSITY, "kdensity" },
471 { F_MONTHLEN, "monthlen" },
472 { F_EPOCHDAY, "epochday" },
473 { F_SETNOTE, "setnote" },
474 { F_POLYFIT, "polyfit" },
475 { F_CHOWLIN, "chowlin" },
476 { F_VARSIMUL, "varsimul" },
477 { F_STRSPLIT, "strsplit" },
478 { F_INLIST, "inlist" },
479 { F_ERRMSG, "errmsg" },
480 { F_ISCONST, "isconst" },
481 { F_IRF, "irf" },
482 { F_INBUNDLE, "inbundle" },
483 { F_STRSUB, "strsub" },
484 { F_REGSUB, "regsub" },
485 { F_CNAMEGET, "cnameget" },
486 { F_RNAMEGET, "rnameget" },
487 { F_RANDINT, "randint" },
488 { F_NADARWAT, "nadarwat" }, /* Nadaraya-Watson */
489 { F_SIMANN, "simann" }, /* simulated annealing */
490 { F_LOESS, "loess" },
491 { F_FREQ, "freq" },
492 { F_GHK, "ghk" },
493 { F_HALTON, "halton" },
494 { F_IWISHART, "iwishart" },
495 { F_ISNAN, "isnan" },
496 { F_TYPESTR, "typestr" },
497 { F_QUADTAB, "quadtable" },
498 { F_AGGRBY, "aggregate" },
499 { F_REMOVE, "remove" },
500 { F_ISODATE, "isodate" },
501 { F_ISOWEEK, "isoweek" },
502 { F_JULDATE, "juldate" },
503 { F_GETLINE, "getline" },
504 { F_ATOF, "atof" },
505 { F_FIXNAME, "fixname" },
506 { F_ISOCONV, "isoconv" },
507 { F_SUBSTR, "substr" },
508 { F_MPI_SEND, "mpisend" },
509 { F_MPI_RECV, "mpirecv" },
510 { F_BCAST, "mpibcast" },
511 { F_REDUCE, "mpireduce" },
512 { F_ALLREDUCE, "mpiallred" },
513 { F_SCATTER, "mpiscatter" },
514 { F_BARRIER, "mpibarrier" },
515 { F_EASTER, "easterday" },
516 { F_GENSERIES, "genseries" },
517 { F_CURL, "curl" },
518 { F_JSONGET, "jsonget" },
519 { F_JSONGETB, "jsongetb" },
520 { F_XMLGET, "xmlget" },
521 { F_NLINES, "nlines" },
522 { F_KPSSCRIT, "kpsscrit" },
523 { F_ARRAY, "array" },
524 { F_STRVALS, "strvals" },
525 { F_STRINGIFY, "stringify" },
526 { F_BOOTCI, "bootci" },
527 { F_BOOTPVAL, "bootpval" },
528 { F_SEASONALS, "seasonals" },
529 { F_DEFARRAY, "defarray" },
530 { F_DEFBUNDLE, "defbundle" },
531 { F_DEFLIST, "deflist" },
532 { F_DEFARGS, "_" },
533 { F_KSETUP, "ksetup" },
534 { F_MWEIGHTS, "mweights" },
535 { F_MGRADIENT, "mgradient" },
536 { F_MLINCOMB, "mlincomb" },
537 { F_MIDASMULT, "midasmult" },
538 { F_HFDIFF, "hfdiff" },
539 { F_HFLDIFF, "hfldiff" },
540 { F_HFLIST, "hflist" },
541 { F_NMMAX, "NMmax" },
542 { F_GSSMAX, "GSSmax" },
543 { F_CNUMBER, "cnumber" },
544 { F_NAALEN, "naalen" },
545 { F_KMEIER, "kmeier" },
546 { F_NORMTEST, "normtest" },
547 { F_ECDF, "ecdf" },
548 { F_NPCORR, "npcorr" },
549 { F_DAYSPAN, "dayspan" },
550 { F_SMPLSPAN, "smplspan" },
551 { F_SLEEP, "sleep" },
552 { F_GETINFO, "getinfo" },
553 { F_CDUMIFY, "cdummify" },
554 { F_SVM, "svm" },
555 { F_GETKEYS, "getkeys" },
556 { F_FEVAL, "feval" },
557 { F_BRENAME, "brename" },
558 { F_CCODE, "isocountry" },
559 { F_LSOLVE, "Lsolve" },
560 { F_HYP2F1, "hyp2f1" },
561 { F_STRFTIME, "strftime" },
562 { F_STRPTIME, "strptime" },
563 { F_BKW, "bkw" },
564 { F_FZERO, "fzero" },
565 { F_CONV2D, "conv2d" },
566 { F_MSPLITBY, "msplitby" },
567 { F_FLATTEN, "flatten" },
568 { F_FUNCERR, "funcerr" },
569 { F_ERRORIF, "errorif" },
570 { F_ISCMPLX, "iscomplex" },
571 { F_COMPLEX, "complex" },
572 { F_CONJ, "conj" },
573 { F_CSWITCH, "cswitch" },
574 { F_RANDPERM, "randperm" },
575 { F_STDIZE, "stdize" },
576 { F_STACK, "stack" },
577 { F_GEOPLOT, "geoplot" },
578 { F_BINCOEFF, "bincoeff" },
579 { F_TDISAGG, "tdisagg" },
580 { F_ASSERT, "assert" },
581 { F_VMA, "vma" },
582 { F_BCHECK, "bcheck" },
583 { F_CONTAINS, "contains" },
584 { F_LPSOLVE, "lpsolve" },
585 { 0, NULL }
586 };
587
588 struct str_table func_alias[] = {
589 { F_EIGEN, "eiggen2" },
590 { F_NMMAX, "NMmin" },
591 { F_NRMAX, "NRmin" },
592 { F_BFGSMAX, "BFGSmin" },
593 { F_BFGSCMAX, "BFGScmin" },
594 { F_GSSMAX, "GSSmin" },
595 { F_GAMMA, "gammafunc" },
596 { F_GAMMA, "gamma" },
597 { F_LOG, "logs" },
598 { F_LOG, "ln" },
599 { F_SQUARE, "xpx" },
600 { F_BACKTICK, "$" },
601 { F_CNAMESET, "colnames" },
602 { F_RNAMESET, "rownames" },
603 { F_CNAMEGET, "colname" },
604 { F_RNAMEGET, "rowname" },
605 { F_EXISTS, "isnull" }, /* deprecated */
606 { 0, NULL }
607 };
608
609 struct str_table hidden_funcs[] = {
610 { HF_CLOGFI, "_clogitfi" },
611 { HF_JBTERMS, "_jbterms" },
612 { HF_LISTINFO, "_listinfo" },
613 { HF_REGLS, "_regls" },
614 { HF_SFCGI, "_sfcgi" },
615 { 0, NULL }
616 };
617
const_lookup(const char * s)618 int const_lookup (const char *s)
619 {
620 int i;
621
622 for (i=0; consts[i].id != 0; i++) {
623 if (!strcmp(s, consts[i].str)) {
624 return consts[i].id;
625 }
626 }
627
628 return 0;
629 }
630
constname(int c)631 const char *constname (int c)
632 {
633 int i;
634
635 for (i=0; consts[i].id != 0; i++) {
636 if (c == consts[i].id) {
637 return consts[i].str;
638 }
639 }
640
641 return "unknown";
642 }
643
gretl_function_hash_init(void)644 static GHashTable *gretl_function_hash_init (void)
645 {
646 GHashTable *ht;
647 int i;
648
649 ht = g_hash_table_new(g_str_hash, g_str_equal);
650
651 for (i=0; ptrfuncs[i].str != NULL; i++) {
652 g_hash_table_insert(ht, (gpointer) ptrfuncs[i].str, &ptrfuncs[i]);
653 }
654
655 for (i=0; funcs[i].str != NULL; i++) {
656 g_hash_table_insert(ht, (gpointer) funcs[i].str, &funcs[i]);
657 }
658
659 for (i=0; hidden_funcs[i].str != NULL; i++) {
660 g_hash_table_insert(ht, (gpointer) hidden_funcs[i].str,
661 &hidden_funcs[i]);
662 }
663
664 return ht;
665 }
666
667 static GHashTable *oht;
668
install_function_override(const char * funname,const char * pkgname,gpointer data)669 int install_function_override (const char *funname,
670 const char *pkgname,
671 gpointer data)
672 {
673 if (funname == NULL) {
674 /* cleanup signal */
675 if (oht != NULL) {
676 g_hash_table_destroy(oht);
677 oht = NULL;
678 }
679 return 0;
680 }
681
682 if (oht == NULL) {
683 oht = g_hash_table_new_full(g_str_hash, g_str_equal,
684 g_free, NULL);
685 }
686
687 if (oht != NULL) {
688 gchar *key = g_strdup_printf("%s::%s", pkgname, funname);
689
690 g_hash_table_insert(oht, (gpointer) key, data);
691 }
692
693 return 0;
694 }
695
delete_function_override(const char * funname,const char * pkgname)696 int delete_function_override (const char *funname,
697 const char *pkgname)
698 {
699 int ret = 0;
700
701 if (oht != NULL) {
702 gchar *key = g_strdup_printf("%s::%s", pkgname, funname);
703
704 if (g_hash_table_remove(oht, key)) {
705 fprintf(stderr, "'%s': deleted override of built-in\n", key);
706 ret = 1;
707 }
708 g_free(key);
709 }
710
711 return ret;
712 }
713
get_function_override(const char * sf,gpointer p)714 static ufunc *get_function_override (const char *sf,
715 gpointer p)
716 {
717 const char *sp = function_package_get_name(p);
718 gchar *key = g_strdup_printf("%s::%s", sp, sf);
719 ufunc *uf = g_hash_table_lookup(oht, key);
720
721 #if 0
722 if (uf != NULL) {
723 fprintf(stderr, "'%s': using package override\n", key);
724 }
725 #endif
726 g_free(key);
727
728 return uf;
729 }
730
731 /* Attention: this function is called from function_loopkup()
732 below, and in that context @p will be NULL -- so don't
733 dereference @p without checking it for nullity first!
734 */
735
real_function_lookup(const char * s,int aliases,parser * p)736 static int real_function_lookup (const char *s, int aliases,
737 parser *p)
738 {
739 static GHashTable *fht;
740 gpointer fnp;
741
742 if (s == NULL) {
743 /* cleanup signal */
744 if (fht != NULL) {
745 g_hash_table_destroy(fht);
746 fht = NULL;
747 }
748 return 0;
749 }
750
751 if (fht == NULL) {
752 fht = gretl_function_hash_init();
753 }
754
755 fnp = g_hash_table_lookup(fht, s);
756 if (fnp != NULL) {
757 struct str_table *st = (struct str_table *) fnp;
758
759 if (p != NULL && st->id > 0 && st->id < FP_MAX) {
760 struct str_table_ex *sx = (struct str_table_ex *) fnp;
761
762 p->data = sx->ptr;
763 }
764 return st->id;
765 }
766
767 if (aliases) {
768 int i;
769
770 for (i=0; func_alias[i].id != 0; i++) {
771 if (!strcmp(s, func_alias[i].str)) {
772 #if 0 /* not just yet? */
773 if (!strcmp(s, "isnull")) {
774 gretl_warnmsg_set(_("obsolete function isnull(): "
775 "please use !exists() instead"));
776 }
777 #endif
778 if (p != NULL) {
779 p->flags |= P_ALIASED;
780 }
781 return func_alias[i].id;
782 }
783 }
784 }
785
786 return 0;
787 }
788
gretl_function_hash_cleanup(void)789 void gretl_function_hash_cleanup (void)
790 {
791 real_function_lookup(NULL, 0, NULL);
792 install_function_override(NULL, NULL, NULL);
793 }
794
function_lookup(const char * s)795 int function_lookup (const char *s)
796 {
797 return real_function_lookup(s, 0, NULL);
798 }
799
is_function_alias(const char * s)800 int is_function_alias (const char *s)
801 {
802 int i;
803
804 for (i=0; func_alias[i].id != 0; i++) {
805 if (!strcmp(s, func_alias[i].str)) {
806 return 1;
807 }
808 }
809
810 return 0;
811 }
812
get_genr_function_pointer(int f)813 void *get_genr_function_pointer (int f)
814 {
815 int i;
816
817 for (i=0; ptrfuncs[i].str != NULL; i++) {
818 if (ptrfuncs[i].id == f) {
819 return ptrfuncs[i].ptr;
820 }
821 }
822
823 return NULL;
824 }
825
function_lookup_with_alias(const char * s,parser * p)826 static int function_lookup_with_alias (const char *s,
827 parser *p)
828 {
829 if (oht != NULL) {
830 /* we have a record of one or more package-private
831 functions whose names collide with built-ins
832 */
833 gpointer pp = get_active_function_package(OPT_O);
834
835 if (pp != NULL) {
836 ufunc *uf = get_function_override(s, pp);
837
838 if (uf != NULL) {
839 p->idstr = gretl_strdup(s);
840 p->data = uf;
841 return UFUN;
842 }
843 }
844 }
845
846 return real_function_lookup(s, 1, p);
847 }
848
funname(int t)849 static const char *funname (int t)
850 {
851 int i;
852
853 for (i=0; ptrfuncs[i].id != 0; i++) {
854 if (t == ptrfuncs[i].id) {
855 return ptrfuncs[i].str;
856 }
857 }
858
859 for (i=0; funcs[i].id != 0; i++) {
860 if (t == funcs[i].id) {
861 return funcs[i].str;
862 }
863 }
864
865 for (i=0; hidden_funcs[i].id != 0; i++) {
866 if (t == hidden_funcs[i].id) {
867 return hidden_funcs[i].str;
868 }
869 }
870
871 return "unknown";
872 }
873
show_alias(int i)874 static int show_alias (int i)
875 {
876 if (strstr(func_alias[i].str, "min")) {
877 return 1;
878 } else {
879 return 0;
880 }
881 }
882
883 /* return the number of built-in functions */
884
gen_func_count(void)885 int gen_func_count (void)
886 {
887 int i, n = 0;
888
889 for (i=0; ptrfuncs[i].id != 0; i++) {
890 n++;
891 }
892
893 for (i=0; funcs[i].id != 0; i++) {
894 n++;
895 }
896
897 for (i=0; func_alias[i].id != 0; i++) {
898 if (show_alias(i)) {
899 n++;
900 }
901 }
902
903 return n;
904 }
905
906 /* return the name of function @i, including aliases */
907
gen_func_name(int i)908 const char *gen_func_name (int i)
909 {
910 int j, seq = -1;
911
912 for (j=0; ptrfuncs[j].id != 0; j++) {
913 seq++;
914 if (seq == i) {
915 return ptrfuncs[i].str;
916 }
917 }
918
919 for (j=0; funcs[j].id != 0; j++) {
920 seq++;
921 if (seq == i) {
922 return funcs[j].str;
923 }
924 }
925
926 for (j=0; func_alias[j].id != 0; j++) {
927 if (show_alias(j)) {
928 seq++;
929 }
930 if (seq == i) {
931 return func_alias[j].str;
932 }
933 }
934
935 return NULL;
936 }
937
model_var_count(void)938 int model_var_count (void)
939 {
940 int i;
941
942 for (i=0; mvars[i].id != 0; i++) ;
943 return i;
944 }
945
model_var_name(int i)946 const char *model_var_name (int i)
947 {
948 return mvars[i].str;
949 }
950
bundle_var_count(void)951 int bundle_var_count (void)
952 {
953 int i;
954
955 for (i=0; bvars[i].id != 0; i++) ;
956 return i;
957 }
958
bundle_var_name(int i)959 const char *bundle_var_name (int i)
960 {
961 return bvars[i].str;
962 }
963
data_var_count(void)964 int data_var_count (void)
965 {
966 int i, n = 0;
967
968 for (i=0; dvars[i].id != 0; i++) {
969 if (dvars[i].str[0] == '$') {
970 n++;
971 }
972 }
973
974 return n;
975 }
976
data_var_name(int i)977 const char *data_var_name (int i)
978 {
979 return dvars[i].str;
980 }
981
gretl_function_complete(const char * s)982 const char *gretl_function_complete (const char *s)
983 {
984 size_t n = strlen(s);
985 int i;
986
987 for (i=0; ptrfuncs[i].str != NULL; i++) {
988 if (!strncmp(s, ptrfuncs[i].str, n)) {
989 return ptrfuncs[i].str;
990 }
991 }
992
993 for (i=0; funcs[i].str != NULL; i++) {
994 if (!strncmp(s, funcs[i].str, n)) {
995 return funcs[i].str;
996 }
997 }
998
999 return NULL;
1000 }
1001
gretl_const_count(void)1002 int gretl_const_count (void)
1003 {
1004 int i;
1005
1006 for (i=0; consts[i].id != 0; i++) ;
1007 return i;
1008 }
1009
gretl_const_name(int i)1010 const char *gretl_const_name (int i)
1011 {
1012 return consts[i].str;
1013 }
1014
1015 /* end external stuff */
1016
1017 /* cases where 'end' can indicate 'last element of' */
1018 #define DUM_END_OK(t) (t==MAT || t==ARRAY || t==MVAR || t==STR || t == LIST)
1019
dummy_lookup(const char * s,parser * p)1020 static int dummy_lookup (const char *s, parser *p)
1021 {
1022 int i, d = 0;
1023
1024 for (i=0; dummies[i].id != 0; i++) {
1025 if (!strcmp(s, dummies[i].str)) {
1026 d = dummies[i].id;
1027 break;
1028 }
1029 }
1030
1031 if (MSEL_DUM(d) && parser_next_char(p) != ']') {
1032 if (d == DUM_END && DUM_END_OK(p->upsym)) {
1033 ; /* OK? */
1034 } else if (d == DUM_END) {
1035 fprintf(stderr, "DUM_END: not interpreted for upsym %s\n",
1036 getsymb(p->upsym));
1037 d = 0;
1038 } else {
1039 d = 0;
1040 }
1041 }
1042
1043 return d;
1044 }
1045
dumname(int t)1046 const char *dumname (int t)
1047 {
1048 int i;
1049
1050 for (i=0; dummies[i].id != 0; i++) {
1051 if (t == dummies[i].id) {
1052 return dummies[i].str;
1053 }
1054 }
1055
1056 return "unknown";
1057 }
1058
dvar_lookup(const char * s)1059 static int dvar_lookup (const char *s)
1060 {
1061 int i;
1062
1063 for (i=0; dvars[i].id != 0; i++) {
1064 if (!strcmp(s, dvars[i].str)) {
1065 return dvars[i].id;
1066 }
1067 }
1068
1069 return 0;
1070 }
1071
dvarname(int t)1072 const char *dvarname (int t)
1073 {
1074 int i;
1075
1076 for (i=0; dvars[i].id != 0; i++) {
1077 if (t == dvars[i].id) {
1078 return dvars[i].str;
1079 }
1080 }
1081
1082 return "unknown";
1083 }
1084
mvar_lookup(const char * s)1085 int mvar_lookup (const char *s)
1086 {
1087 int i;
1088
1089 for (i=0; mvars[i].id != 0; i++) {
1090 if (!strcmp(s, mvars[i].str)) {
1091 return mvars[i].id;
1092 }
1093 }
1094
1095 /* aliases */
1096
1097 if (!strcmp(s, "$nrsq")) {
1098 return M_TRSQ;
1099 } else if (!strcmp(s, "$fcerr")) {
1100 return M_FCSE;
1101 }
1102
1103 return 0;
1104 }
1105
mvarname(int t)1106 const char *mvarname (int t)
1107 {
1108 int i;
1109
1110 for (i=0; mvars[i].id != 0; i++) {
1111 if (t == mvars[i].id) {
1112 return mvars[i].str;
1113 }
1114 }
1115
1116 return "unknown";
1117 }
1118
bvar_lookup(const char * s)1119 int bvar_lookup (const char *s)
1120 {
1121 int i;
1122
1123 for (i=0; bvars[i].id != 0; i++) {
1124 if (!strcmp(s, bvars[i].str)) {
1125 return bvars[i].id;
1126 }
1127 }
1128
1129 return 0;
1130 }
1131
bvarname(int t)1132 const char *bvarname (int t)
1133 {
1134 int i;
1135
1136 for (i=0; bvars[i].id != 0; i++) {
1137 if (t == bvars[i].id) {
1138 return bvars[i].str;
1139 }
1140 }
1141
1142 return "unknown";
1143 }
1144
genr_function_word(const char * s)1145 int genr_function_word (const char *s)
1146 {
1147 int ret = 0;
1148
1149 ret = real_function_lookup(s, 0, NULL);
1150 if (!ret) {
1151 ret = dvar_lookup(s);
1152 }
1153 if (!ret) {
1154 ret = mvar_lookup(s);
1155 }
1156 if (!ret) {
1157 ret = bvar_lookup(s);
1158 }
1159 if (!ret) {
1160 ret = const_lookup(s);
1161 }
1162
1163 return ret;
1164 }
1165
parser_ensure_error_buffer(parser * p)1166 int parser_ensure_error_buffer (parser *p)
1167 {
1168 if (p->errprn == NULL) {
1169 p->errprn = gretl_print_new(GRETL_PRINT_BUFFER, NULL);
1170 if (p->errprn != NULL) {
1171 p->prn = p->errprn;
1172 return 0;
1173 } else {
1174 return E_ALLOC;
1175 }
1176 }
1177
1178 return 0;
1179 }
1180
undefined_symbol_error(const char * s,parser * p)1181 void undefined_symbol_error (const char *s, parser *p)
1182 {
1183 parser_ensure_error_buffer(p);
1184 parser_print_input(p);
1185
1186 if (p->ch == '.') {
1187 pprintf(p->prn, _("%s: no such object"), s);
1188 } else {
1189 pprintf(p->prn, _("The symbol '%s' is undefined"), s);
1190 }
1191 p->err = E_DATA;
1192 }
1193
function_noargs_error(const char * s,parser * p)1194 static void function_noargs_error (const char *s, parser *p)
1195 {
1196 parser_ensure_error_buffer(p);
1197 parser_print_input(p);
1198
1199 pprintf(p->prn, _("'%s': no argument was given"), s);
1200 p->err = E_ARGS;
1201 }
1202
context_error(int c,parser * p,const char * func)1203 void context_error (int c, parser *p, const char *func)
1204 {
1205 #if LDEBUG
1206 if (func != NULL) {
1207 fprintf(stderr, "context error in %s()\n", func);
1208 }
1209 #endif
1210 parser_ensure_error_buffer(p);
1211 if (c != 0) {
1212 parser_print_input(p);
1213 pprintf(p->prn, _("The symbol '%c' is not valid in this context\n"), c);
1214 if (c == '&') {
1215 pputs(p->prn, _("(for logical AND, use '&&')\n"));
1216 } else if (c == '|') {
1217 pputs(p->prn, _("(for logical OR, use '||')\n"));
1218 } else if (c == ',') {
1219 p->err = E_PARSE;
1220 }
1221 } else if (p->sym == EOT) {
1222 parser_print_input(p);
1223 pputs(p->prn, _("Incomplete expression\n"));
1224 } else {
1225 const char *s = getsymb_full(p->sym, p);
1226
1227 if (s != NULL && *s != '\0' && strcmp(s, "unknown")) {
1228 pprintf(p->prn, _("The symbol '%s' is not valid in this context\n"),
1229 getsymb_full(p->sym, p));
1230 } else {
1231 pprintf(p->prn, "The symbol %d is not valid in this context\n",
1232 p->sym);
1233 }
1234 }
1235
1236 if (!p->err) {
1237 p->err = E_PARSE;
1238 }
1239 }
1240
1241 /* @parsing_query: we want to keep track of the case
1242 where we're lexing/parsing the branches of a
1243 ternary "query" expression. When such an expression
1244 is evaluated, it's OK if the branch _not_ taken
1245 contains an undefined symbol; indeed, this can
1246 occur by design, as in
1247
1248 scalar y = isnull(x) ? 0 : x
1249
1250 when "x" is in fact undefined.
1251
1252 We therefore use the "UNDEF" node type to defuse the
1253 error that would otherwise arise on parsing. An error
1254 is triggered only if the branch that references the
1255 UNDEF node is selected (attempting to evaluate an UNDEF
1256 node automatically throws an error.)
1257 */
1258
1259 static int parsing_query;
1260
set_parsing_query(int s)1261 void set_parsing_query (int s)
1262 {
1263 parsing_query = s;
1264 }
1265
get_quoted_string(parser * p,int prevsym)1266 static char *get_quoted_string (parser *p, int prevsym)
1267 {
1268 char *s = NULL;
1269 int n;
1270
1271 #if 0
1272 fprintf(stderr, "get_quoted_string: sym = '%s', prevsym '%s'\n",
1273 getsymb(p->sym), getsymb(prevsym));
1274 #endif
1275
1276 /* Should backslash be taken as literal or as
1277 escape character? Depends on the context,
1278 but in exactly what way?
1279 */
1280
1281 if (prevsym != F_SPRINTF && parsing_query) {
1282 /* this branch revised 2020-02-06 */
1283 n = gretl_charpos('"', p->point);
1284 } else {
1285 /* look for a matching (non-escaped) double-quote */
1286 n = double_quote_position(p->point);
1287 if (n < 0) {
1288 /* backward compatibility */
1289 n = gretl_charpos('"', p->point);
1290 }
1291 }
1292
1293 if (n >= 0) {
1294 s = gretl_strndup(p->point, n);
1295 parser_advance(p, n + 1);
1296 } else {
1297 parser_print_input(p);
1298 pprintf(p->prn, _("Unmatched '%c'\n"), '"');
1299 p->err = E_PARSE;
1300 }
1301
1302 if (!p->err) {
1303 if (p->ch == '.' && *p->point == '$') {
1304 /* maybe quoted name of saved model followed by
1305 dollar variable? */
1306 p->sym = MMEMB;
1307 } else {
1308 p->sym = CSTR;
1309 }
1310 }
1311
1312 if (s != NULL && !strcmp(s, "\\\"")) {
1313 gchar *gs;
1314
1315 free(s);
1316 gs = g_strdup_printf("\"");
1317 s = gretl_strdup(s);
1318 free(gs);
1319 }
1320
1321 return s;
1322 }
1323
might_be_date_string(const char * s,int n)1324 static int might_be_date_string (const char *s, int n)
1325 {
1326 char test[12];
1327 int y, m, d;
1328
1329 #if LDEBUG
1330 fprintf(stderr, "might_be_date_string: s='%s', n=%d\n", s, n);
1331 #endif
1332
1333 if (n > 10) {
1334 return 0;
1335 }
1336
1337 *test = 0;
1338 strncat(test, s, n);
1339
1340 if (strspn(s, "1234567890") == n) {
1341 /* plain integer (FIXME?) */
1342 return 1;
1343 } else if (sscanf(s, "%d:%d", &y, &m) == 2) {
1344 /* quarterly, monthly date */
1345 return 1;
1346 } else if (sscanf(s, "%d-%d-%d", &y, &m, &d) == 3) {
1347 /* daily date? */
1348 return 1;
1349 } else if (sscanf(s, "%d/%d/%d", &y, &m, &d) == 3) {
1350 /* daily date? */
1351 return 1;
1352 }
1353
1354 return 0;
1355 }
1356
obs_node(parser * p)1357 NODE *obs_node (parser *p)
1358 {
1359 NODE *ret = NULL;
1360 char word[OBSLEN + 2] = {0};
1361 const char *s = p->point - 1;
1362 int close;
1363 int special = 0;
1364 int t = -1;
1365
1366 close = gretl_charpos(']', s);
1367
1368 #if LDEBUG
1369 fprintf(stderr, "obs_node: s='%s', ch='%c', close=%d\n",
1370 s, (char) p->ch, close);
1371 #endif
1372
1373 if (close == 0) {
1374 pprintf(p->prn, _("Empty observation []\n"));
1375 p->err = E_PARSE;
1376 } else if (close < 0) {
1377 pprintf(p->prn, _("Unmatched '%c'\n"), '[');
1378 p->err = E_PARSE;
1379 } else if (*s == '"' && close < OBSLEN + 2 &&
1380 gretl_charpos('"', s+1) == close - 2) {
1381 /* quoted observation label? */
1382 strncat(word, s, close);
1383 special = 1;
1384 } else if (might_be_date_string(s, close)) {
1385 strncat(word, s, close);
1386 special = 1;
1387 }
1388
1389 if (special && !p->err) {
1390 t = get_t_from_obs_string(word, p->dset);
1391 if (t >= 0) {
1392 /* convert to user-style 1-based index */
1393 t++;
1394 }
1395 }
1396
1397 if (t > 0) {
1398 parser_advance(p, close - 1);
1399 lex(p);
1400 ret = newdbl(t);
1401 } else if (!p->err) {
1402 #if LDEBUG
1403 fprintf(stderr, "obs_node: first try failed, going for expr\n");
1404 #endif
1405 lex(p);
1406 ret = expr(p);
1407 }
1408
1409 return ret;
1410 }
1411
is_gretl_accessor(const char * s)1412 int is_gretl_accessor (const char *s)
1413 {
1414 int i, n;
1415
1416 for (i=0; dvars[i].id != 0; i++) {
1417 n = strlen(dvars[i].str);
1418 if (!strncmp(s, dvars[i].str, n)) {
1419 return !isalpha(s[n]);
1420 }
1421 }
1422
1423 for (i=0; mvars[i].id != 0; i++) {
1424 n = strlen(mvars[i].str);
1425 if (!strncmp(s, mvars[i].str, n)) {
1426 return !isalpha(s[n]);
1427 }
1428 }
1429
1430 for (i=0; bvars[i].id != 0; i++) {
1431 n = strlen(bvars[i].str);
1432 if (!strncmp(s, bvars[i].str, n)) {
1433 return !isalpha(s[n]);
1434 }
1435 }
1436
1437 return 0;
1438 }
1439
look_up_dollar_word(const char * s,parser * p)1440 static void look_up_dollar_word (const char *s, parser *p)
1441 {
1442 char *bstr;
1443
1444 if ((p->idnum = dvar_lookup(s)) > 0) {
1445 p->sym = DVAR;
1446 } else if ((p->idnum = const_lookup(s)) > 0) {
1447 p->sym = CON;
1448 } else if ((p->idnum = mvar_lookup(s)) > 0) {
1449 p->sym = MVAR;
1450 } else if ((p->idnum = bvar_lookup(s)) > 0) {
1451 p->sym = DBUNDLE;
1452 } else if ((bstr = get_built_in_string_by_name(s+1))) {
1453 p->sym = CSTR;
1454 p->idstr = gretl_strdup(bstr);
1455 } else {
1456 undefined_symbol_error(s, p);
1457 }
1458
1459 #if LDEBUG
1460 fprintf(stderr, "look_up_dollar_word: '%s' -> %d\n",
1461 s, p->idnum);
1462 #endif
1463 }
1464
1465 #ifdef USE_RLIB
1466 # include "gretl_foreign.h"
1467 # include "libset.h"
1468
maybe_get_R_function(const char * s)1469 static int maybe_get_R_function (const char *s)
1470 {
1471 if (libset_get_bool(R_FUNCTIONS) &&
1472 strlen(s) >= 3 && !strncmp(s, "R.", 2)) {
1473 return get_R_function_by_name(s + 2);
1474 } else {
1475 return 0;
1476 }
1477 }
1478
1479 #else /* !USE_RLIB */
1480 # define maybe_get_R_function(s) (0)
1481 #endif
1482
1483 static int doing_genseries;
1484
set_doing_genseries(int s)1485 void set_doing_genseries (int s)
1486 {
1487 doing_genseries = s;
1488 }
1489
1490 /* Get the next non-space byte beyond what's already parsed:
1491 this will either be p->ch, or may be found at p->point
1492 or beyond.
1493 */
1494
parser_next_char(parser * p)1495 static int parser_next_char (parser *p)
1496 {
1497 if (p->ch != ' ') {
1498 return p->ch;
1499 } else {
1500 const char *s = p->point;
1501
1502 while (*s) {
1503 if (!isspace(*s)) {
1504 return *s;
1505 }
1506 s++;
1507 }
1508 return 0;
1509 }
1510 }
1511
char_past_point(parser * p)1512 static int char_past_point (parser *p)
1513 {
1514 if (*p->point != '\0') {
1515 int i;
1516
1517 for (i=1; p->point[i] != '\0'; i++) {
1518 if (!isspace(p->point[i])) {
1519 return p->point[i];
1520 }
1521 }
1522 }
1523 return 0;
1524 }
1525
handle_lpnext(const char * s,parser * p,int have_dset)1526 static void handle_lpnext (const char *s, parser *p,
1527 int have_dset)
1528 {
1529 ufunc *u = get_user_function_by_name(s);
1530 int vnum = -1;
1531
1532 if (have_dset) {
1533 vnum = current_series_index(p->dset, s);
1534 }
1535
1536 if (u == NULL && vnum >= 0) {
1537 /* unambiguous: series */
1538 p->sym = SERIES;
1539 } else if (u != NULL && vnum < 0) {
1540 /* unambiguous: function */
1541 p->sym = UFUN;
1542 } else if (u != NULL) {
1543 /* ambiguous case! */
1544 if (gretl_function_depth() > 0) {
1545 /* function writers should avoid collisions
1546 when naming series
1547 */
1548 p->sym = SERIES;
1549 } else if (p->targ != UNK && p->targ != LIST && p->targ != SERIES) {
1550 /* target not compatible with series lag? */
1551 p->sym = UFUN;
1552 } else if (defining_list(p) || p->targ == SERIES) {
1553 /* debatable */
1554 p->sym = SERIES;
1555 } else {
1556 /* debatable */
1557 p->sym = UFUN;
1558 }
1559 }
1560
1561 if (p->sym != 0) {
1562 p->idstr = gretl_strdup(s);
1563 if (p->sym == UFUN) {
1564 p->data = u;
1565 } else {
1566 p->idnum = vnum;
1567 /* in case of any intervening space */
1568 while (p->ch == ' ') {
1569 parser_getc(p);
1570 }
1571 }
1572 }
1573 }
1574
is_function_word(const char * s)1575 static int is_function_word (const char *s)
1576 {
1577 return function_lookup_with_alias(s, NULL) != 0 ||
1578 get_user_function_by_name(s) != NULL;
1579 }
1580
look_up_word(const char * s,parser * p)1581 static void look_up_word (const char *s, parser *p)
1582 {
1583 int have_dset = (p->dset != NULL && p->dset->v > 0);
1584 int prevsym = p->sym;
1585 int lpnext, err = 0;
1586
1587 #if LDEBUG
1588 fprintf(stderr, "look_up_word: s='%s', ch='%c', next='%c'\n",
1589 s, p->ch, parser_next_char(p));
1590 #endif
1591
1592 /* is the next (or next non-space) character left paren? */
1593 lpnext = parser_next_char(p) == '(';
1594
1595 /* initialize */
1596 p->sym = 0;
1597 p->data = NULL;
1598
1599 if (lpnext) {
1600 /* identifier is immediately followed by left paren:
1601 most likely a function call but could be the name
1602 of a series followed by a lag specifier
1603 */
1604 p->sym = function_lookup_with_alias(s, p);
1605 if (p->sym == 0) {
1606 handle_lpnext(s, p, have_dset);
1607 } else if (p->sym == F_STACK) {
1608 /* special! */
1609 if (strstr(p->point, "--length") || strstr(p->point, "--offset")) {
1610 stack_update_parser_input(p);
1611 }
1612 p->flags |= P_STACK;
1613 }
1614 }
1615
1616 if (p->sym == 0) {
1617 p->idnum = const_lookup(s);
1618 if (p->idnum > 0) {
1619 p->sym = CON;
1620 } else {
1621 p->idnum = dummy_lookup(s, p);
1622 if (p->idnum > 0) {
1623 p->sym = DUM;
1624 } else if (have_dset &&
1625 (p->idnum = current_series_index(p->dset, s)) >= 0) {
1626 p->sym = SERIES;
1627 p->idstr = gretl_strdup(s);
1628 } else if (have_dset && !strcmp(s, "time")) {
1629 p->sym = DUM;
1630 p->idnum = DUM_TREND;
1631 } else if ((p->data = get_user_var_by_name(s)) != NULL) {
1632 user_var *u = p->data;
1633
1634 if (u->type == GRETL_TYPE_DOUBLE) {
1635 p->sym = NUM;
1636 } else if (u->type == GRETL_TYPE_MATRIX) {
1637 p->sym = MAT;
1638 } else if (u->type == GRETL_TYPE_BUNDLE) {
1639 p->sym = BUNDLE;
1640 } else if (u->type == GRETL_TYPE_ARRAY) {
1641 p->sym = ARRAY;
1642 } else if (u->type == GRETL_TYPE_STRING) {
1643 p->sym = STR;
1644 } else if (u->type == GRETL_TYPE_LIST) {
1645 p->sym = LIST;
1646 }
1647 p->idstr = gretl_strdup(s);
1648 } else if (gretl_get_object_by_name(s)) {
1649 p->sym = UOBJ;
1650 p->idstr = gretl_strdup(s);
1651 } else if (defining_list(p) && varname_match_any(p->dset, s)) {
1652 p->sym = WLIST;
1653 p->idstr = gretl_strdup(s);
1654 } else if (have_dset && !strcmp(s, "t")) {
1655 /* if "t" has not been otherwise defined, treat it
1656 as an alias for "obs"
1657 */
1658 p->sym = DVAR;
1659 p->idnum = R_INDEX;
1660 } else if (maybe_get_R_function(s)) {
1661 /* note: all "native" types take precedence over this */
1662 p->sym = RFUN;
1663 p->idstr = gretl_strdup(s + 2);
1664 } else if (parsing_query || prevsym == B_AND) {
1665 p->sym = UNDEF;
1666 p->idstr = gretl_strdup(s);
1667 } else if (p->flags & P_AND) {
1668 p->sym = UNDEF;
1669 p->idstr = gretl_strdup(s);
1670 } else if (!strcmp(s, "pi")) {
1671 /* deprecated */
1672 p->idnum = CONST_PI;
1673 p->sym = CON;
1674 } else {
1675 err = E_UNKVAR;
1676 }
1677 }
1678 }
1679
1680 if (err) {
1681 /* @s could be a function identifier with no
1682 following left paren */
1683 if (is_function_word(s)) {
1684 function_noargs_error(s, p);
1685 } else {
1686 undefined_symbol_error(s, p);
1687 }
1688 }
1689 }
1690
maybe_treat_as_postfix(parser * p)1691 static void maybe_treat_as_postfix (parser *p)
1692 {
1693 if (p->sym == NUM) {
1694 const char *ok = ")]}+-*/%,:";
1695 int c = char_past_point(p);
1696
1697 /* Interpret as foo++ or foo-- ? Only if
1698 the following character is suitable.
1699 */
1700 if (c == 0 || strchr(ok, c)) {
1701 p->sym = p->ch == '+'? NUM_P : NUM_M;
1702 /* swallow the pluses or minuses */
1703 parser_advance(p, 1);
1704 }
1705 }
1706 }
1707
1708 #define dollar_series(t) (t > R_SCALAR_MAX && t < R_SERIES_MAX)
1709
1710 #define could_be_matrix(t) (model_data_matrix(t) || \
1711 model_data_matrix_builder(t) || \
1712 t == M_UHAT || t == M_YHAT || \
1713 (t > R_SERIES_MAX && t < R_MAX))
1714
1715 #define could_be_bundle(t) (t == R_RESULT)
1716
word_check_next_char(parser * p)1717 static void word_check_next_char (parser *p)
1718 {
1719 char chk[2] = {p->ch, '\0'};
1720
1721 #if LDEBUG
1722 if (p->ch) {
1723 fprintf(stderr, "word_check_next_char: ch='%c', sym='%s'\n",
1724 p->ch, getsymb(p->sym));
1725 } else {
1726 fprintf(stderr, "word_check_next_char: ch = NUL\n");
1727 }
1728 #endif
1729 p->upsym = 0;
1730
1731 /* Here we're checking for validity and syntactical implications
1732 of one of "([.+-" immediately following a 'word' of some kind.
1733 */
1734 if (strspn(chk, "([.+-") == 0) {
1735 /* none of the above */
1736 return;
1737 }
1738
1739 if (p->sym == UNDEF) {
1740 /* 2020-03-16: suspend disbelief, we might be in a branch
1741 of "cond ? x : y" that ends up not being taken */
1742 return;
1743 }
1744
1745 if (p->ch == '(') {
1746 /* series or list (lag), or function */
1747 if (p->sym == SERIES) {
1748 if (p->idnum > 0 && p->idnum == p->lh.vnum) {
1749 p->flags |= P_AUTOREG;
1750 }
1751 p->upsym = p->sym;
1752 p->sym = LAG;
1753 } else if (p->sym == LIST) {
1754 p->upsym = p->sym;
1755 p->sym = LAG;
1756 } else if (p->sym == MVAR && model_data_matrix(p->idnum)) {
1757 /* old-style "$coeff(x1)" etc. */
1758 p->sym = DMSTR;
1759 } else if (!func1_symb(p->sym) &&
1760 !func2_symb(p->sym) &&
1761 !func3_symb(p->sym) &&
1762 !funcn_symb(p->sym) &&
1763 p->sym != UFUN &&
1764 p->sym != RFUN) {
1765 p->err = E_PARSE;
1766 }
1767 } else if (p->ch == '[') {
1768 p->upsym = p->sym;
1769 if (p->sym == MAT) {
1770 /* slice of user matrix */
1771 p->sym = MSL;
1772 } else if (*p->point != '"' &&
1773 (p->sym == MVAR || p->sym == DVAR) &&
1774 could_be_matrix(p->idnum)) {
1775 /* slice of $-matrix? */
1776 p->sym = MSL;
1777 } else if (p->sym == SERIES) {
1778 /* observation from series */
1779 p->sym = OBS;
1780 } else if (p->sym == DVAR && dollar_series(p->idnum)) {
1781 /* observation from "dollar" series */
1782 p->sym = OBS;
1783 } else if (p->sym == ARRAY || p->sym == LIST || p->sym == STR) {
1784 /* element or range of array, list or string */
1785 p->sym = OSL;
1786 } else if (p->sym == MVAR && model_data_list(p->idnum)) {
1787 /* element/range of accessor list */
1788 p->sym = OSL;
1789 } else if (p->sym == BUNDLE) {
1790 /* member from bundle */
1791 p->sym = BMEMB;
1792 } else if (p->sym == DBUNDLE) {
1793 /* member from $ bundle */
1794 p->sym = DBMEMB;
1795 } else if (p->sym == DVAR && could_be_bundle(p->idnum)) {
1796 p->sym = DBMEMB;
1797 } else {
1798 p->err = E_PARSE;
1799 }
1800 } else if (p->ch == '.' && *p->point == '$') {
1801 if (p->sym == UOBJ) {
1802 /* name of saved model followed by dollar variable? */
1803 p->sym = MMEMB;
1804 } else if (p->sym == CSTR) {
1805 /* maybe quoted name of saved object followed by
1806 dollar variable? */
1807 p->sym = MMEMB;
1808 } else {
1809 p->err = E_PARSE;
1810 }
1811 } else if (p->ch == '.' && isalpha(*p->point)) {
1812 if (p->sym == LIST) {
1813 p->sym = LISTVAR;
1814 } else if (p->sym == BUNDLE) {
1815 p->sym = BMEMB;
1816 } else if (p->sym == DBUNDLE) {
1817 p->sym = DBMEMB;
1818 } else if (p->sym == DVAR && could_be_bundle(p->idnum)) {
1819 p->sym = DBMEMB;
1820 } else {
1821 p->err = E_PARSE;
1822 }
1823 } else if (p->ch == '+' && *p->point == '+') {
1824 maybe_treat_as_postfix(p);
1825 } else if (p->ch == '-' && *p->point == '-') {
1826 maybe_treat_as_postfix(p);
1827 }
1828
1829 if (p->err) {
1830 context_error(p->ch, p, "word_check_next_char");
1831 }
1832 }
1833
is_word_char(parser * p)1834 static int is_word_char (parser *p)
1835 {
1836 if (strchr(wordchars, p->ch) != NULL) {
1837 return 1;
1838 } else if (defining_list(p) && !doing_genseries &&
1839 (p->ch == '*' || p->ch == '?')) {
1840 return 1;
1841 }
1842
1843 return 0;
1844 }
1845
getword(parser * p,int greek)1846 static void getword (parser *p, int greek)
1847 {
1848 char word[32];
1849 int i = 0;
1850
1851 if (greek) {
1852 /* we have a single (2-byte) UTF-8 greek letter in scope */
1853 for (i=0; i<2; i++) {
1854 word[i] = p->ch;
1855 parser_getc(p);
1856 }
1857 } else {
1858 /* we know the first char is acceptable (and might be '$' or '_') */
1859 word[i++] = p->ch;
1860 parser_getc(p);
1861
1862 #ifdef USE_RLIB
1863 /* allow for R.foo function namespace */
1864 if (*word == 'R' && p->ch == '.' && *p->point != '$') {
1865 if (libset_get_bool(R_FUNCTIONS) && !gretl_is_bundle("R")) {
1866 word[i++] = p->ch;
1867 parser_getc(p);
1868 }
1869 }
1870 #endif
1871 while (p->ch != 0 && is_word_char(p) && i < 31) {
1872 word[i++] = p->ch;
1873 parser_getc(p);
1874 }
1875 }
1876
1877 word[i] = '\0';
1878
1879 #if LDEBUG
1880 fprintf(stderr, "getword: word = '%s'\n", word);
1881 #endif
1882
1883 while (p->ch != 0 && strchr(wordchars, p->ch) != NULL) {
1884 /* flush excess word characters */
1885 parser_getc(p);
1886 }
1887
1888 if (p->flags & P_GETSTR) {
1889 /* uninterpreted string wanted */
1890 p->sym = CSTR;
1891 p->idstr = gretl_strdup(word);
1892 p->flags ^= P_GETSTR;
1893 return; /* FIXME bundle-member name */
1894 } else if ((*word == '$' && word[1]) || !strcmp(word, "obs")) {
1895 look_up_dollar_word(word, p);
1896 } else if (*word == '$' && word[1] == '\0' && (p->ch == '[' || p->ch == '.')) {
1897 p->sym = DBUNDLE;
1898 p->idnum = B_MODEL;
1899 } else {
1900 look_up_word(word, p);
1901 }
1902
1903 if (!p->err) {
1904 word_check_next_char(p);
1905 }
1906
1907 #if LDEBUG
1908 fprintf(stderr, "getword: p->err = %d\n", p->err);
1909 #endif
1910 }
1911
colon_ok(parser * p,char * s,int n)1912 static int colon_ok (parser *p, char *s, int n)
1913 {
1914 int i;
1915
1916 if (p->flags & P_SLICING) {
1917 /* calculating a matrix "slice": colon is a separator in
1918 this context, cannot be part of a time/panel observation
1919 string
1920 */
1921 #if LDEBUG
1922 fprintf(stderr, "colon_ok: doing matrix slice\n");
1923 #endif
1924 return 0;
1925 }
1926
1927 if (n != 1 && n != 3) {
1928 return 0;
1929 }
1930
1931 for (i=0; i<=n; i++) {
1932 if (!isdigit(s[i])) {
1933 return 0;
1934 }
1935 }
1936
1937 return 1;
1938 }
1939
1940 /* Below: we're testing p->ch for validity, given what we've already
1941 packed into string @s up to element @i, and with some regard to
1942 the next character in line.
1943 */
1944
ok_dbl_char(parser * p,char * s,int i)1945 static int ok_dbl_char (parser *p, char *s, int i)
1946 {
1947 int ret = 0;
1948
1949 if (i < 0 || (p->ch >= '0' && p->ch <= '9')) {
1950 return 1;
1951 }
1952
1953 switch (p->ch) {
1954 case '+':
1955 case '-':
1956 ret = s[i] == 'e' || s[i] == 'E';
1957 break;
1958 case '.':
1959 ret = !strchr(s, '.') && !strchr(s, ':') &&
1960 !strchr(s, 'e') && !strchr(s, 'E') &&
1961 *p->point != '.';
1962 break;
1963 case 'e':
1964 case 'E':
1965 ret = !strchr(s, 'e') && !strchr(s, 'E') &&
1966 !strchr(s, ':');
1967 break;
1968 case ':':
1969 /* allow for obs numbers in the form, e.g., "1995:10" */
1970 ret = colon_ok(p, s, i);
1971 break;
1972 default:
1973 break;
1974 }
1975
1976 return ret;
1977 }
1978
parse_number(parser * p)1979 static void parse_number (parser *p)
1980 {
1981 char xstr[NUMLEN] = {0};
1982 int gotcol = 0;
1983 int i = 0;
1984
1985 while (ok_dbl_char(p, xstr, i - 1) && i < NUMLEN - 1) {
1986 xstr[i++] = p->ch;
1987 if (p->ch == ':') {
1988 gotcol = 1;
1989 }
1990 parser_getc(p);
1991 }
1992
1993 while (p->ch >= '0' && p->ch <= '9') {
1994 /* flush excess numeric characters */
1995 parser_getc(p);
1996 }
1997
1998 #if LDEBUG
1999 fprintf(stderr, "parse_number: xstr = '%s'\n", xstr);
2000 #endif
2001
2002 if (gotcol) {
2003 #if LDEBUG
2004 fprintf(stderr, " got colon: obs identifier?\n");
2005 #endif
2006 if (p->dset == NULL || p->dset->n == 0) {
2007 p->err = E_NODATA;
2008 } else if (p->dset->pd == 1) {
2009 p->err = E_PDWRONG;
2010 } else if (dateton(xstr, p->dset) < 0) {
2011 p->err = E_DATA;
2012 } else {
2013 p->idstr = gretl_strdup(xstr);
2014 p->sym = CSTR;
2015 }
2016 } else {
2017 p->xval = dot_atof(xstr);
2018 p->sym = CNUM;
2019 #if LDEBUG
2020 fprintf(stderr, " dot_atof gave %g\n", p->xval);
2021 #endif
2022 }
2023 }
2024
wildcard_special(parser * p)2025 static int wildcard_special (parser *p)
2026 {
2027 char cprev = *(p->point - 2);
2028
2029 if (p->ch == '?') {
2030 char cnext = *p->point;
2031
2032 if ((cprev == ' ' || cprev == ')') && cnext == ' ') {
2033 /* '?' is presumably ternary operator */
2034 return 0;
2035 }
2036 }
2037
2038 if (cprev == ' ' &&
2039 (bare_data_type(p->sym) || closing_sym(p->sym) ||
2040 (p->sym == LAG))) {
2041 p->sym = B_LCAT;
2042 } else {
2043 getword(p, 0);
2044 }
2045
2046 return 1;
2047 }
2048
2049 /* 0xE2 0x88 0x92 = UTF-8 minus */
2050
lex_try_utf8(parser * p)2051 static void lex_try_utf8 (parser *p)
2052 {
2053 if ((unsigned char) *p->point == 0x88 &&
2054 (unsigned char) *(p->point + 1) == 0x92) {
2055 p->sym = B_SUB;
2056 parser_getc(p);
2057 parser_getc(p);
2058 parser_getc(p);
2059 } else {
2060 pprintf(p->prn, _("Unexpected byte 0x%x\n"),
2061 (unsigned char) p->ch);
2062 p->err = E_PARSE;
2063 }
2064 }
2065
2066 #define word_start_special(c) (c == '$' || c == '_')
2067
2068 /* accept 'to', but only with spaces before and after */
2069 #define lag_range_sym(p) ((p->flags & P_LAGPRSE) && p->ch == 't' && \
2070 *p->point == 'o' && \
2071 *(p->point - 2) == ' ' && \
2072 *(p->point + 1) == ' ')
2073
lex(parser * p)2074 void lex (parser *p)
2075 {
2076 static int prevsyms[2];
2077
2078 #if LDEBUG
2079 if (p->ch) {
2080 fprintf(stderr, "lex: p->ch='%c', point='%c'\n", p->ch, *p->point);
2081 } else {
2082 fprintf(stderr, "lex: p->ch is NUL\n");
2083 }
2084 #endif
2085 prevsyms[0] = prevsyms[1];
2086 prevsyms[1] = p->sym;
2087
2088 if (p->ch == 0) {
2089 p->sym = EOT;
2090 return;
2091 }
2092
2093 while (p->ch != 0) {
2094 if ((unsigned char) p->ch == 0xE2) {
2095 lex_try_utf8(p);
2096 return;
2097 } else if (is_greek_letter(p->point - 1)) {
2098 getword(p, 1);
2099 return;
2100 }
2101 switch (p->ch) {
2102 case ' ':
2103 case '\t':
2104 case '\r':
2105 case '\n':
2106 parser_getc(p);
2107 break;
2108 case '+':
2109 p->sym = B_ADD;
2110 parser_getc(p);
2111 return;
2112 case '-':
2113 p->sym = B_SUB;
2114 parser_getc(p);
2115 return;
2116 case '*':
2117 if (defining_list(p) && !doing_genseries) {
2118 /* allow for '*' as wildcard */
2119 wildcard_special(p);
2120 return;
2121 }
2122 parser_getc(p);
2123 if (p->ch == '*') {
2124 p->sym = B_KRON;
2125 parser_getc(p);
2126 } else {
2127 p->sym = B_MUL;
2128 }
2129 return;
2130 case '\'':
2131 p->sym = B_TRMUL;
2132 parser_getc(p);
2133 return;
2134 case '/':
2135 p->sym = B_DIV;
2136 parser_getc(p);
2137 return;
2138 case '\\':
2139 p->sym = B_LDIV;
2140 parser_getc(p);
2141 return;
2142 case '%':
2143 p->sym = B_MOD;
2144 parser_getc(p);
2145 return;
2146 case '^':
2147 p->sym = B_POW;
2148 parser_getc(p);
2149 return;
2150 case '&':
2151 parser_getc(p);
2152 if (p->ch == '&') {
2153 p->sym = B_AND;
2154 parser_getc(p);
2155 } else {
2156 p->sym = U_ADDR;
2157 }
2158 return;
2159 case '|':
2160 parser_getc(p);
2161 if (p->ch == '|') {
2162 p->sym = B_OR;
2163 parser_getc(p);
2164 } else {
2165 p->sym = B_VCAT;
2166 }
2167 return;
2168 case '!':
2169 parser_getc(p);
2170 if (p->ch == '=') {
2171 p->sym = B_NEQ;
2172 parser_getc(p);
2173 } else {
2174 p->sym = U_NOT;
2175 }
2176 return;
2177 case '=':
2178 parser_getc(p);
2179 if (p->ch == '=') {
2180 parser_getc(p);
2181 p->sym = B_EQ;
2182 } else {
2183 gretl_errmsg_set(_("If you meant to test for "
2184 "equality, please use '=='"));
2185 p->err = E_PARSE;
2186 }
2187 return;
2188 case '>':
2189 parser_getc(p);
2190 if (p->ch == '=') {
2191 p->sym = B_GTE;
2192 parser_getc(p);
2193 } else {
2194 p->sym = B_GT;
2195 }
2196 return;
2197 case '<':
2198 parser_getc(p);
2199 if (p->ch == '=') {
2200 p->sym = B_LTE;
2201 parser_getc(p);
2202 } else if (p->ch == '>') {
2203 p->sym = B_NEQ;
2204 parser_getc(p);
2205 } else {
2206 p->sym = B_LT;
2207 }
2208 return;
2209 case '(':
2210 p->sym = G_LPR;
2211 parser_getc(p);
2212 return;
2213 case ')':
2214 p->sym = G_RPR;
2215 parser_getc(p);
2216 return;
2217 case '[':
2218 p->sym = G_LBR;
2219 parser_getc(p);
2220 return;
2221 case '{':
2222 p->sym = G_LCB;
2223 parser_getc(p);
2224 return;
2225 case '}':
2226 p->sym = G_RCB;
2227 parser_getc(p);
2228 return;
2229 case ']':
2230 p->sym = G_RBR;
2231 parser_getc(p);
2232 return;
2233 case '~':
2234 p->sym = B_HCAT;
2235 parser_getc(p);
2236 return;
2237 case ',':
2238 p->sym = P_COM;
2239 parser_getc(p);
2240 return;
2241 case ';':
2242 if (defining_list(p)) {
2243 p->sym = B_JOIN;
2244 } else {
2245 /* used in matrix definition */
2246 p->sym = P_SEMI;
2247 }
2248 parser_getc(p);
2249 return;
2250 case ':':
2251 p->sym = P_COL;
2252 parser_getc(p);
2253 return;
2254 case '?':
2255 if (defining_list(p) && !doing_genseries) {
2256 /* allow for '?' as wildcard */
2257 if (wildcard_special(p)) {
2258 return;
2259 }
2260 }
2261 p->sym = QUERY;
2262 parser_getc(p);
2263 return;
2264 case '.':
2265 if (*p->point == '$') {
2266 p->sym = P_DOT;
2267 parser_getc(p);
2268 return;
2269 } else if (isalpha(*p->point)) {
2270 /* 2017-01-07 */
2271 p->sym = BMEMB;
2272 parser_getc(p);
2273 return;
2274 }
2275 parser_getc(p);
2276 if (p->ch == '*') {
2277 p->sym = B_DOTMULT;
2278 parser_getc(p);
2279 return;
2280 } else if (p->ch == '/') {
2281 p->sym = B_DOTDIV;
2282 parser_getc(p);
2283 return;
2284 } else if (p->ch == '^') {
2285 p->sym = B_DOTPOW;
2286 parser_getc(p);
2287 return;
2288 } else if (p->ch == '+') {
2289 p->sym = B_DOTADD;
2290 parser_getc(p);
2291 return;
2292 } else if (p->ch == '-') {
2293 p->sym = B_DOTSUB;
2294 parser_getc(p);
2295 return;
2296 } else if (p->ch == '=') {
2297 p->sym = B_DOTEQ;
2298 parser_getc(p);
2299 return;
2300 } else if (p->ch == '>') {
2301 p->sym = B_DOTGT;
2302 parser_getc(p);
2303 if (p->ch == '=') {
2304 p->sym = B_DOTGTE;
2305 parser_getc(p);
2306 }
2307 return;
2308 } else if (p->ch == '<') {
2309 p->sym = B_DOTLT;
2310 parser_getc(p);
2311 if (p->ch == '=') {
2312 p->sym = B_DOTLTE;
2313 parser_getc(p);
2314 }
2315 return;
2316 } else if (p->ch == '!' && *p->point == '=') {
2317 p->sym = B_DOTNEQ;
2318 parser_getc(p);
2319 parser_getc(p);
2320 return;
2321 } else if (p->ch == '.') {
2322 p->sym = B_ELLIP;
2323 parser_getc(p);
2324 return;
2325 } else {
2326 /* not a "dot operator", so back up */
2327 parser_ungetc(p);
2328 }
2329 /* Falls through. */
2330 default:
2331 if (defining_list(p) && lag_range_sym(p)) {
2332 p->sym = B_RANGE;
2333 parser_getc(p);
2334 parser_getc(p);
2335 return;
2336 }
2337 if (defining_list(p) && !doing_genseries &&
2338 (bare_data_type(p->sym) || closing_sym(p->sym) ||
2339 p->sym == LAG) && *(p->point - 2) == ' ') {
2340 /* may be forming a list, but only if there are
2341 spaces between the terms
2342 */
2343 p->sym = B_LCAT;
2344 return;
2345 }
2346 if (isdigit(p->ch)) {
2347 parse_number(p);
2348 return;
2349 } else if (p->ch == '.' && isdigit(*p->point)) {
2350 parse_number(p);
2351 return;
2352 } else if (islower(p->ch) || isupper(p->ch) ||
2353 word_start_special(p->ch)) {
2354 getword(p, 0);
2355 return;
2356 } else if (p->ch == '"') {
2357 p->idstr = get_quoted_string(p, prevsyms[0]);
2358 return;
2359 } else {
2360 parser_print_input(p);
2361 if (isprint(p->ch)) {
2362 pprintf(p->prn, _("Invalid character '%c'\n"), p->ch);
2363 } else {
2364 pprintf(p->prn, _("Unexpected byte 0x%x\n"),
2365 (unsigned char) p->ch);
2366 }
2367 p->err = E_PARSE;
2368 return;
2369 }
2370 } /* end ch switch */
2371 } /* end while ch != 0 */
2372 }
2373
getsymb_full(int t,const parser * p)2374 const char *getsymb_full (int t, const parser *p)
2375 {
2376 if ((t > F1_MIN && t < F1_MAX) ||
2377 (t > F1_MAX && t < F2_MAX) ||
2378 (t > F2_MAX && t < FN_MAX)) {
2379 return funname(t);
2380 }
2381
2382 if (t == EOT) {
2383 return "EOT";
2384 }
2385
2386 /* yes, well */
2387 if (t == OBS) {
2388 return "OBS";
2389 } else if (t == MSL) {
2390 return "MSL";
2391 } else if (t == OSL) {
2392 return "OSL";
2393 } else if (t == SUB_ADDR) {
2394 return "SUB_ADDR";
2395 } else if (t == DMSTR) {
2396 return "DMSTR";
2397 } else if (t == SLRAW) {
2398 return "SLRAW";
2399 } else if (t == MSPEC) {
2400 return "MSPEC";
2401 } else if (t == SUBSL) {
2402 return "SUBSL";
2403 } else if (t == MDEF) {
2404 return "MDEF";
2405 } else if (t == FARGS) {
2406 return "FARGS";
2407 } else if (t == LIST || t == WLIST) {
2408 return "LIST";
2409 } else if (t == EMPTY) {
2410 return "EMPTY";
2411 } else if (t == LISTVAR) {
2412 return "LISTVAR";
2413 } else if (t == BMEMB) {
2414 return "BMEMB";
2415 } else if (t == SERIES) {
2416 return "SERIES";
2417 } else if (t == MAT) {
2418 return "MAT";
2419 } else if (t == UNDEF) {
2420 return "UNDEF";
2421 } else if (t == NUM) {
2422 return "NUM";
2423 } else if (t == CNUM) {
2424 return "CNUM";
2425 } else if (t == IVEC) {
2426 return "IVEC";
2427 } else if (t == NUM_P) {
2428 return "NUM_P";
2429 } else if (t == NUM_M) {
2430 return "NUM_M";
2431 } else if (t == DBUNDLE) {
2432 return "DBUNDLE";
2433 } else if (t == DBMEMB) {
2434 return "DBMEMB";
2435 } else if (t == DBMEMB) {
2436 return "MMEMB";
2437 } else if (t == PTR) {
2438 return "PTR";
2439 }
2440
2441 if (p != NULL) {
2442 if (t == BUNDLE) {
2443 return p->idstr;
2444 } else if (t == ARRAY) {
2445 return p->idstr;
2446 } else if (t == UOBJ) {
2447 return p->idstr;
2448 } else if (t == CON) {
2449 return constname(p->idnum);
2450 } else if (t == DUM) {
2451 return dumname(p->idnum);
2452 } else if (t == DVAR) {
2453 return dvarname(p->idnum);
2454 } else if (t == MVAR) {
2455 return mvarname(p->idnum);
2456 } else if (t == UFUN || t == RFUN) {
2457 return p->idstr;
2458 } else if (t == STR || t == CSTR) {
2459 return p->idstr;
2460 }
2461 } else {
2462 if (t == BUNDLE) {
2463 return "BUNDLE";
2464 } else if (t == ARRAY) {
2465 return "ARRAY";
2466 } else if (t == UOBJ) {
2467 return "UOBJ";
2468 } else if (t == CON) {
2469 return "CON";
2470 } else if (t == DUM) {
2471 return "dummy constant";
2472 } else if (t == DVAR) {
2473 return "DVAR";
2474 } else if (t == MVAR) {
2475 return "MVAR";
2476 } else if (t == UFUN) {
2477 return "UFUN";
2478 } else if (t == RFUN) {
2479 return "RFUN";
2480 } else if (t == STR) {
2481 return "STR";
2482 } else if (t == CSTR) {
2483 return "CSTR";
2484 }
2485 }
2486
2487 switch (t) {
2488 case B_ASN:
2489 return "=";
2490 case B_ADD:
2491 case U_POS:
2492 return "+";
2493 case B_SUB:
2494 case U_NEG:
2495 return "-";
2496 case B_MUL:
2497 return "*";
2498 case B_TRMUL:
2499 return "'";
2500 case B_DIV:
2501 return "/";
2502 case B_LDIV:
2503 return "\\";
2504 case B_MOD:
2505 return "%";
2506 case B_POW:
2507 return "^";
2508 case B_EQ:
2509 return "==";
2510 case B_NEQ:
2511 return "!=";
2512 case B_GT:
2513 return ">";
2514 case B_LT:
2515 return "<";
2516 case B_GTE:
2517 return ">=";
2518 case B_LTE:
2519 return "<=";
2520 case B_AND:
2521 return "&&";
2522 case B_JOIN:
2523 return "JOIN";
2524 case B_RANGE:
2525 return " to ";
2526 case B_ELLIP:
2527 return "..";
2528 case U_ADDR:
2529 return "&";
2530 case B_OR:
2531 return "||";
2532 case U_NOT:
2533 return "!";
2534 case G_LPR:
2535 return "(";
2536 case G_RPR:
2537 return ")";
2538 case G_LBR:
2539 return "[";
2540 case G_RBR:
2541 return "]";
2542 case G_LCB:
2543 return "{";
2544 case G_RCB:
2545 return "}";
2546 case B_DOTMULT:
2547 return ".*";
2548 case B_DOTDIV:
2549 return "./";
2550 case B_DOTPOW:
2551 return ".^";
2552 case B_DOTADD:
2553 return ".+";
2554 case B_DOTSUB:
2555 return ".-";
2556 case B_DOTEQ:
2557 return ".=";
2558 case B_DOTGT:
2559 return ".>";
2560 case B_DOTLT:
2561 return ".<";
2562 case B_DOTGTE:
2563 return ".>=";
2564 case B_DOTLTE:
2565 return ".<=";
2566 case B_DOTNEQ:
2567 return ".!=";
2568 case B_DOTASN:
2569 return "dot-assign";
2570 case B_KRON:
2571 return "**";
2572 case B_HCAT:
2573 return "~";
2574 case B_VCAT:
2575 return "|";
2576 case B_LCAT:
2577 return "LCAT";
2578 case P_COM:
2579 return ",";
2580 case P_DOT:
2581 return ".";
2582 case P_SEMI:
2583 return ";";
2584 case P_COL:
2585 return ":";
2586 case QUERY:
2587 return "query";
2588 case LAG:
2589 return "lag";
2590 default:
2591 break;
2592 }
2593
2594 return "unknown";
2595 }
2596
getsymb(int t)2597 const char *getsymb (int t)
2598 {
2599 return getsymb_full(t, NULL);
2600 }
2601