1 #include "lib/mlr_globals.h"
2 #include "lib/mlrutil.h"
3 #include "dsl/function_manager.h"
4 #include "dsl/context_flags.h"
5 #include "dsl/rval_evaluators.h"
6 #include "dsl/rxval_evaluators.h"
7 
8 // ----------------------------------------------------------------
9 typedef enum _func_class_t {
10 	FUNC_CLASS_ARITHMETIC,
11 	FUNC_CLASS_MATH,
12 	FUNC_CLASS_BOOLEAN,
13 	FUNC_CLASS_STRING,
14 	FUNC_CLASS_CONVERSION,
15 	FUNC_CLASS_TYPING,
16 	FUNC_CLASS_MAPS,
17 	FUNC_CLASS_TIME
18 } func_class_t;
19 
20 typedef enum _arity_check_t {
21 	ARITY_CHECK_PASS,
22 	ARITY_CHECK_FAIL,
23 	ARITY_CHECK_NO_SUCH
24 } arity_check_t;
25 
26 typedef struct _function_lookup_t {
27 	func_class_t function_class;
28 	char*        function_name;
29 	int          arity; // for variadic, this is minimum arity
30 	int          variadic;
31 	char*        usage_string;
32 } function_lookup_t;
33 
34 // This is shared between all instances
35 static function_lookup_t FUNCTION_LOOKUP_TABLE[];
36 
37 // ----------------------------------------------------------------
38 // See also comments in rval_evaluators.h
39 
40 //  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
41 static void fmgr_check_arity_with_report(fmgr_t* pfmgr, char* function_name,
42 	int user_provided_arity, int* pvariadic);
43 
44 static rval_evaluator_t* fmgr_alloc_evaluator_from_variadic_func_name(
45 	char* function_name, rval_evaluator_t** pargs, int nargs);
46 
47 static rval_evaluator_t* fmgr_alloc_evaluator_from_zary_func_name(
48 	char* function_name);
49 
50 static rval_evaluator_t* fmgr_alloc_evaluator_from_unary_func_name(
51 	char* function_name, rval_evaluator_t* parg1);
52 
53 static rval_evaluator_t* fmgr_alloc_evaluator_from_binary_func_name(
54 	char* function_name,
55 	rval_evaluator_t* parg1, rval_evaluator_t* parg2);
56 
57 static rval_evaluator_t* fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(
58 	char* function_name,
59 	rval_evaluator_t* parg1, char* regex_string, int ignore_case);
60 
61 static rval_evaluator_t* fmgr_alloc_evaluator_from_ternary_func_name(
62 	char* function_name,
63 	rval_evaluator_t* parg1, rval_evaluator_t* parg2, rval_evaluator_t* parg3);
64 
65 static rval_evaluator_t* fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(
66 	char* function_name,
67 	rval_evaluator_t* parg1, char* regex_string, int ignore_case, rval_evaluator_t* parg3);
68 
69 //  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
70 // For rval functions, we pass rval_evaluator_t* (CST); for rxval functions, we pass
71 // mlr_dsl_ast_node_t* (AST). It's easy to construct the former from the latter, of
72 // course. The difference is that we look up map-enabled functions by name first,
73 // then non-map-enabled functions by name second.
74 //
75 // * AST nodes are passed to try to look up a map-enabled function given a function name.
76 // * If those exist, they construct CST structures and return.
77 // * But if not, we look up a non-map-enabled function for the same function name.
78 // * If that doesn't exist either, then it's a fatal error. So we go ahead and
79 //   construct an rval_evaluator_t* CST structure from the AST node simply to
80 //   save keystrokes, passing that to the function-lookup routines.
81 //
82 // It would simpler to always construct CST structures before looking up
83 // function names, but the only problem is that it's hard to unconstruct CST
84 // structures in case the name lookup fails. (The function-manager
85 // as-yet-unresolved-name list points into them, whenever function arguments
86 // themselves include function calls). Namely, the following scenario is to be
87 // avoided:
88 //
89 // * Construct rxval_evaluator_t* CST structure.
90 // * Look up map-enabled function with a given name.
91 // * That doesn't exist.
92 // * Now the rxval_evaluator_t* can't be torn down since the fmgr points into it.
93 
94 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_variadic_func_name(
95 	char* function_name, sllv_t* parg_nodes,
96 	fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/);
97 
98 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_unary_func_name(
99 	char* function_name,
100 	mlr_dsl_ast_node_t* parg1,
101 	fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/);
102 
103 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_binary_func_name(
104 	char* function_name,
105 	mlr_dsl_ast_node_t* parg1, mlr_dsl_ast_node_t* pargs2,
106 	fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/);
107 
108 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_ternary_func_name(
109 	char* function_name,
110 	mlr_dsl_ast_node_t* parg1, mlr_dsl_ast_node_t* pargs2, mlr_dsl_ast_node_t* pargs3,
111 	fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/);
112 
113 //  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
114 static void  resolve_func_callsite(fmgr_t* pfmgr, rval_evaluator_t*  pev);
115 static void resolve_func_xcallsite(fmgr_t* pfmgr, rxval_evaluator_t* pxev);
116 static rxval_evaluator_t* fmgr_alloc_xeval_wrapping_eval(rval_evaluator_t* pevaluator);
117 static rval_evaluator_t* fmgr_alloc_eval_wrapping_xeval(rxval_evaluator_t* pxevaluator);
118 
119 // ----------------------------------------------------------------
fmgr_alloc()120 fmgr_t* fmgr_alloc() {
121 	fmgr_t* pfmgr = mlr_malloc_or_die(sizeof(fmgr_t));
122 
123 	pfmgr->function_lookup_table = &FUNCTION_LOOKUP_TABLE[0];
124 
125 	pfmgr->built_in_function_names = hss_alloc();
126 	for (int i = 0; ; i++) {
127 		function_lookup_t* plookup = &pfmgr->function_lookup_table[i];
128 		char* fname = plookup->function_name;
129 		if (fname == NULL)
130 			break;
131 		hss_add(pfmgr->built_in_function_names, fname);
132 	}
133 
134 	pfmgr->pudf_names_to_defsite_states = lhmsv_alloc();
135 
136 	pfmgr->pfunc_callsite_evaluators_to_resolve  = sllv_alloc();
137 	pfmgr->pfunc_callsite_xevaluators_to_resolve = sllv_alloc();
138 
139 	return pfmgr;
140 }
141 
142 // ----------------------------------------------------------------
fmgr_free(fmgr_t * pfmgr,context_t * pctx)143 void fmgr_free(fmgr_t* pfmgr, context_t* pctx) {
144 	if (pfmgr == NULL)
145 		return;
146 
147 	for (lhmsve_t* pe = pfmgr->pudf_names_to_defsite_states->phead; pe != NULL; pe = pe->pnext) {
148 		udf_defsite_state_t * pdefsite_state = pe->pvvalue;
149 		free(pdefsite_state->name);
150 		pdefsite_state->pfree_func(pdefsite_state->pvstate, pctx);
151 		free(pdefsite_state);
152 	}
153 	lhmsv_free(pfmgr->pudf_names_to_defsite_states);
154 	sllv_free(pfmgr->pfunc_callsite_evaluators_to_resolve);
155 	sllv_free(pfmgr->pfunc_callsite_xevaluators_to_resolve);
156 	hss_free(pfmgr->built_in_function_names);
157 	free(pfmgr);
158 }
159 
160 // ----------------------------------------------------------------
fmgr_install_udf(fmgr_t * pfmgr,udf_defsite_state_t * pdefsite_state)161 void fmgr_install_udf(fmgr_t* pfmgr, udf_defsite_state_t* pdefsite_state) {
162 	if (hss_has(pfmgr->built_in_function_names, pdefsite_state->name)) {
163 		fprintf(stderr, "%s: function named \"%s\" must not override a built-in function of the same name.\n",
164 			MLR_GLOBALS.bargv0, pdefsite_state->name);
165 		exit(1);
166 	}
167 	if (lhmsv_get(pfmgr->pudf_names_to_defsite_states, pdefsite_state->name)) {
168 		fprintf(stderr, "%s: function named \"%s\" has already been defined.\n",
169 			MLR_GLOBALS.bargv0, pdefsite_state->name);
170 		exit(1);
171 	}
172 	lhmsv_put(pfmgr->pudf_names_to_defsite_states, mlr_strdup_or_die(pdefsite_state->name), pdefsite_state,
173 		FREE_ENTRY_KEY);
174 }
175 
176 // ================================================================
177 static function_lookup_t FUNCTION_LOOKUP_TABLE[] = {
178 
179 	{FUNC_CLASS_ARITHMETIC, "+",  2,0, "Addition."},
180 	{FUNC_CLASS_ARITHMETIC, "+",  1,0, "Unary plus."},
181 	{FUNC_CLASS_ARITHMETIC, "-",  2,0, "Subtraction."},
182 	{FUNC_CLASS_ARITHMETIC, "-",  1,0, "Unary minus."},
183 	{FUNC_CLASS_ARITHMETIC, "*",  2,0, "Multiplication."},
184 	{FUNC_CLASS_ARITHMETIC, "/",  2,0, "Division."},
185 	{FUNC_CLASS_ARITHMETIC, "//", 2,0, "Integer division: rounds to negative (pythonic)."},
186 
187 	{FUNC_CLASS_ARITHMETIC, ".+",  2,0, "Addition, with integer-to-integer overflow"},
188 	{FUNC_CLASS_ARITHMETIC, ".+",  1,0, "Unary plus, with integer-to-integer overflow."},
189 	{FUNC_CLASS_ARITHMETIC, ".-",  2,0, "Subtraction, with integer-to-integer overflow."},
190 	{FUNC_CLASS_ARITHMETIC, ".-",  1,0, "Unary minus, with integer-to-integer overflow."},
191 	{FUNC_CLASS_ARITHMETIC, ".*",  2,0, "Multiplication, with integer-to-integer overflow."},
192 	{FUNC_CLASS_ARITHMETIC, "./",  2,0, "Division, with integer-to-integer overflow."},
193 	{FUNC_CLASS_ARITHMETIC, ".//", 2,0, "Integer division: rounds to negative (pythonic), with integer-to-integer overflow."},
194 
195 	{FUNC_CLASS_ARITHMETIC, "%",  2,0, "Remainder; never negative-valued (pythonic)."},
196 	{FUNC_CLASS_ARITHMETIC, "**", 2,0, "Exponentiation; same as pow, but as an infix\noperator."},
197 	{FUNC_CLASS_ARITHMETIC, "|",  2,0, "Bitwise OR."},
198 	{FUNC_CLASS_ARITHMETIC, "^",  2,0, "Bitwise XOR."},
199 	{FUNC_CLASS_ARITHMETIC, "&",  2,0, "Bitwise AND."},
200 	{FUNC_CLASS_ARITHMETIC, "~",  1,0,
201 		"Bitwise NOT. Beware '$y=~$x' since =~ is the\nregex-match operator: try '$y = ~$x'."},
202 	{FUNC_CLASS_ARITHMETIC, "<<", 2,0, "Bitwise left-shift."},
203 	{FUNC_CLASS_ARITHMETIC, ">>", 2,0, "Bitwise right-shift."},
204 	{FUNC_CLASS_ARITHMETIC, "bitcount",  1,0, "Count of 1-bits"},
205 
206 	{FUNC_CLASS_BOOLEAN, "==",  2,0, "String/numeric equality. Mixing number and string\nresults in string compare."},
207 	{FUNC_CLASS_BOOLEAN, "!=",  2,0, "String/numeric inequality. Mixing number and string\nresults in string compare."},
208 	{FUNC_CLASS_BOOLEAN, "=~",  2,0,
209 		"String (left-hand side) matches regex (right-hand\n"
210 		"side), e.g. '$name =~ \"^a.*b$\"'."},
211 	{FUNC_CLASS_BOOLEAN, "!=~", 2,0,
212 		"String (left-hand side) does not match regex\n"
213 		"(right-hand side), e.g. '$name !=~ \"^a.*b$\"'."},
214 	{FUNC_CLASS_BOOLEAN, ">",   2,0,
215 		"String/numeric greater-than. Mixing number and string\n"
216 		"results in string compare."},
217 	{FUNC_CLASS_BOOLEAN, ">=",  2,0,
218 		"String/numeric greater-than-or-equals. Mixing number\n"
219 		"and string results in string compare."},
220 	{FUNC_CLASS_BOOLEAN, "<",   2,0,
221 		"String/numeric less-than. Mixing number and string\n"
222 		"results in string compare."},
223 	{FUNC_CLASS_BOOLEAN, "<=",  2,0,
224 		"String/numeric less-than-or-equals. Mixing number\n"
225 		"and string results in string compare."},
226 	{FUNC_CLASS_BOOLEAN, "&&",  2,0, "Logical AND."},
227 	{FUNC_CLASS_BOOLEAN, "||",  2,0, "Logical OR."},
228 	{FUNC_CLASS_BOOLEAN, "^^",  2,0, "Logical XOR."},
229 	{FUNC_CLASS_BOOLEAN, "!",   1,0, "Logical negation."},
230 	{FUNC_CLASS_BOOLEAN, "? :", 3,0, "Ternary operator."},
231 
232 	{FUNC_CLASS_STRING, ".",        2,0, "String concatenation."},
233 	{FUNC_CLASS_STRING, "gsub",     3,0, "Example: '$name=gsub($name, \"old\", \"new\")'\n(replace all)."},
234 	{FUNC_CLASS_STRING, "regextract", 2,0, "Example: '$name=regextract($name, \"[A-Z]{3}[0-9]{2}\")'\n."},
235 	{FUNC_CLASS_STRING, "regextract_or_else", 3,0, "Example: '$name=regextract_or_else($name, \"[A-Z]{3}[0-9]{2}\", \"default\")'\n."},
236 	{FUNC_CLASS_STRING, "strlen",   1,0, "String length."},
237 	{FUNC_CLASS_STRING, "sub",      3,0, "Example: '$name=sub($name, \"old\", \"new\")'\n(replace once)."},
238 	{FUNC_CLASS_STRING, "ssub",     3,0, "Like sub but does no regexing. No characters are special."},
239 	{FUNC_CLASS_STRING, "substr",   3,0,
240 		"substr(s,m,n) gives substring of s from 0-up position m to n \n"
241 		"inclusive. Negative indices -len .. -1 alias to 0 .. len-1."},
242 	{FUNC_CLASS_STRING, "tolower",  1,0, "Convert string to lowercase."},
243 	{FUNC_CLASS_STRING, "toupper",  1,0, "Convert string to uppercase."},
244 	{FUNC_CLASS_STRING, "truncate",   2,0, "Truncates string first argument to max length of int second argument."},
245 	{FUNC_CLASS_STRING, "capitalize",  1,0, "Convert string's first character to uppercase."},
246 	{FUNC_CLASS_STRING, "lstrip",  1,0,  "Strip leading whitespace from string."},
247 	{FUNC_CLASS_STRING, "rstrip",  1,0,  "Strip trailing whitespace from string."},
248 	{FUNC_CLASS_STRING, "strip",  1,0,  "Strip leading and trailing whitespace from string."},
249 	{FUNC_CLASS_STRING, "collapse_whitespace",  1,0,  "Strip repeated whitespace from string."},
250 	{FUNC_CLASS_STRING, "clean_whitespace",  1,0,  "Same as collapse_whitespace and strip."},
251 	{FUNC_CLASS_STRING, "system",  1,0, "Run command string, yielding its stdout minus final carriage return."},
252 
253 	{FUNC_CLASS_MATH, "abs",      1,0, "Absolute value."},
254 	{FUNC_CLASS_MATH, "acos",     1,0, "Inverse trigonometric cosine."},
255 	{FUNC_CLASS_MATH, "acosh",    1,0, "Inverse hyperbolic cosine."},
256 	{FUNC_CLASS_MATH, "asin",     1,0, "Inverse trigonometric sine."},
257 	{FUNC_CLASS_MATH, "asinh",    1,0, "Inverse hyperbolic sine."},
258 	{FUNC_CLASS_MATH, "atan",     1,0, "One-argument arctangent."},
259 	{FUNC_CLASS_MATH, "atan2",    2,0, "Two-argument arctangent."},
260 	{FUNC_CLASS_MATH, "atanh",    1,0, "Inverse hyperbolic tangent."},
261 	{FUNC_CLASS_MATH, "cbrt",     1,0, "Cube root."},
262 	{FUNC_CLASS_MATH, "ceil",     1,0, "Ceiling: nearest integer at or above."},
263 	{FUNC_CLASS_MATH, "cos",      1,0, "Trigonometric cosine."},
264 	{FUNC_CLASS_MATH, "cosh",     1,0, "Hyperbolic cosine."},
265 	{FUNC_CLASS_MATH, "erf",      1,0, "Error function."},
266 	{FUNC_CLASS_MATH, "erfc",     1,0, "Complementary error function."},
267 	{FUNC_CLASS_MATH, "exp",      1,0, "Exponential function e**x."},
268 	{FUNC_CLASS_MATH, "expm1",    1,0, "e**x - 1."},
269 	{FUNC_CLASS_MATH, "floor",    1,0, "Floor: nearest integer at or below."},
270 	// See also http://johnkerl.org/doc/randuv.pdf for more about urand() -> other distributions
271 	{FUNC_CLASS_MATH, "invqnorm", 1,0,
272 		"Inverse of normal cumulative distribution\n"
273 		"function. Note that invqorm(urand()) is normally distributed."},
274 	{FUNC_CLASS_MATH, "log",      1,0, "Natural (base-e) logarithm."},
275 	{FUNC_CLASS_MATH, "log10",    1,0, "Base-10 logarithm."},
276 	{FUNC_CLASS_MATH, "log1p",    1,0, "log(1-x)."},
277 	{FUNC_CLASS_MATH, "logifit",  3,0, "Given m and b from logistic regression, compute\nfit: $yhat=logifit($x,$m,$b)."},
278 	{FUNC_CLASS_MATH, "madd",     3,0, "a + b mod m (integers)"},
279 	{FUNC_CLASS_MATH, "max",      0,1, "max of n numbers; null loses"},
280 	{FUNC_CLASS_MATH, "mexp",     3,0, "a ** b mod m (integers)"},
281 	{FUNC_CLASS_MATH, "min",      0,1, "Min of n numbers; null loses"},
282 	{FUNC_CLASS_MATH, "mmul",     3,0, "a * b mod m (integers)"},
283 	{FUNC_CLASS_MATH, "msub",     3,0, "a - b mod m (integers)"},
284 	{FUNC_CLASS_MATH, "pow",      2,0, "Exponentiation; same as **."},
285 	{FUNC_CLASS_MATH, "qnorm",    1,0, "Normal cumulative distribution function."},
286 	{FUNC_CLASS_MATH, "round",    1,0, "Round to nearest integer."},
287 	{FUNC_CLASS_MATH, "roundm",   2,0, "Round to nearest multiple of m: roundm($x,$m) is\nthe same as round($x/$m)*$m"},
288 	{FUNC_CLASS_MATH, "sgn",      1,0, "+1 for positive input, 0 for zero input, -1 for\nnegative input."},
289 	{FUNC_CLASS_MATH, "sin",      1,0, "Trigonometric sine."},
290 	{FUNC_CLASS_MATH, "sinh",     1,0, "Hyperbolic sine."},
291 	{FUNC_CLASS_MATH, "sqrt",     1,0, "Square root."},
292 	{FUNC_CLASS_MATH, "tan",      1,0, "Trigonometric tangent."},
293 	{FUNC_CLASS_MATH, "tanh",     1,0, "Hyperbolic tangent."},
294 	{FUNC_CLASS_MATH, "urand",    0,0,
295 		"Floating-point numbers uniformly distributed on the unit interval.\n"
296 		"Int-valued example: '$n=floor(20+urand()*11)'." },
297 	{FUNC_CLASS_MATH, "urandrange",    2,0,
298 		"Floating-point numbers uniformly distributed on the interval [a, b)." },
299 	{FUNC_CLASS_MATH, "urand32",  0,0, "Integer uniformly distributed 0 and 2**32-1\n"
300 	"inclusive." },
301 	{FUNC_CLASS_MATH, "urandint", 2,0, "Integer uniformly distributed between inclusive\ninteger endpoints." },
302 
303 	{FUNC_CLASS_TIME, "dhms2fsec", 1,0,
304 		"Recovers floating-point seconds as in\n"
305 		"dhms2fsec(\"5d18h53m20.250000s\") = 500000.250000"},
306 	{FUNC_CLASS_TIME, "dhms2sec",  1,0, "Recovers integer seconds as in\ndhms2sec(\"5d18h53m20s\") = 500000"},
307 	{FUNC_CLASS_TIME, "fsec2dhms", 1,0,
308 		"Formats floating-point seconds as in\nfsec2dhms(500000.25) = \"5d18h53m20.250000s\""},
309 	{FUNC_CLASS_TIME, "fsec2hms",  1,0,
310 		"Formats floating-point seconds as in\nfsec2hms(5000.25) = \"01:23:20.250000\""},
311 
312 	{FUNC_CLASS_TIME, "gmt2sec",   1,0, "Parses GMT timestamp as integer seconds since\nthe epoch."},
313 	{FUNC_CLASS_TIME, "localtime2sec", 1,0, "Parses local timestamp as integer seconds since\n"
314 		"the epoch. Consults $TZ environment variable."},
315 
316 	{FUNC_CLASS_TIME, "hms2fsec",  1,0,
317 		"Recovers floating-point seconds as in\nhms2fsec(\"01:23:20.250000\") = 5000.250000"},
318 	{FUNC_CLASS_TIME, "hms2sec",   1,0, "Recovers integer seconds as in\nhms2sec(\"01:23:20\") = 5000"},
319 	{FUNC_CLASS_TIME, "sec2dhms",  1,0, "Formats integer seconds as in sec2dhms(500000)\n= \"5d18h53m20s\""},
320 
321 	{FUNC_CLASS_TIME, "sec2gmt",   1,0,
322 		"Formats seconds since epoch (integer part)\n"
323 		"as GMT timestamp, e.g. sec2gmt(1440768801.7) = \"2015-08-28T13:33:21Z\".\n"
324 		"Leaves non-numbers as-is."},
325 	{FUNC_CLASS_TIME, "sec2gmt",   2,0,
326 		"Formats seconds since epoch as GMT timestamp with n\n"
327 		"decimal places for seconds, e.g. sec2gmt(1440768801.7,1) = \"2015-08-28T13:33:21.7Z\".\n"
328 		"Leaves non-numbers as-is."},
329 	{FUNC_CLASS_TIME, "sec2gmtdate", 1,0,
330 		"Formats seconds since epoch (integer part)\n"
331 		"as GMT timestamp with year-month-date, e.g. sec2gmtdate(1440768801.7) = \"2015-08-28\".\n"
332 		"Leaves non-numbers as-is."},
333 
334 	{FUNC_CLASS_TIME, "sec2localtime",   1,0, "Formats seconds since epoch (integer part)\n"
335 		"as local timestamp, e.g. sec2localtime(1440768801.7) = \"2015-08-28T13:33:21Z\".\n"
336 		"Consults $TZ environment variable. Leaves non-numbers as-is."},
337 	{FUNC_CLASS_TIME, "sec2localtime",   2,0,
338 		"Formats seconds since epoch as local timestamp with n\n"
339 		"decimal places for seconds, e.g. sec2localtime(1440768801.7,1) = \"2015-08-28T13:33:21.7Z\".\n"
340 		"Consults $TZ environment variable. Leaves non-numbers as-is."},
341 	{FUNC_CLASS_TIME, "sec2localdate", 1,0,
342 		"Formats seconds since epoch (integer part)\n"
343 		"as local timestamp with year-month-date, e.g. sec2localdate(1440768801.7) = \"2015-08-28\".\n"
344 		"Consults $TZ environment variable. Leaves non-numbers as-is."},
345 
346 	{FUNC_CLASS_TIME, "sec2hms",   1,0,
347 		"Formats integer seconds as in\n"
348 		"sec2hms(5000) = \"01:23:20\""},
349 	{FUNC_CLASS_TIME, "strftime",  2,0,
350 		"Formats seconds since the epoch as timestamp, e.g.\n"
351 		"strftime(1440768801.7,\"%Y-%m-%dT%H:%M:%SZ\") = \"2015-08-28T13:33:21Z\", and\n"
352 		"strftime(1440768801.7,\"%Y-%m-%dT%H:%M:%3SZ\") = \"2015-08-28T13:33:21.700Z\".\n"
353 		"Format strings are as in the C library (please see \"man strftime\" on your system),\n"
354 		"with the Miller-specific addition of \"%1S\" through \"%9S\" which format the seconds\n"
355 		"with 1 through 9 decimal places, respectively. (\"%S\" uses no decimal places.)\n"
356 		"See also strftime_local."},
357 	{FUNC_CLASS_TIME, "strftime_local",  2,0,
358 		"Like strftime but consults the $TZ environment variable to get local time zone."},
359 	{FUNC_CLASS_TIME, "strptime",  2,0,
360 		"Parses timestamp as floating-point seconds since the epoch,\n"
361 		"e.g. strptime(\"2015-08-28T13:33:21Z\",\"%Y-%m-%dT%H:%M:%SZ\") = 1440768801.000000,\n"
362 		"and  strptime(\"2015-08-28T13:33:21.345Z\",\"%Y-%m-%dT%H:%M:%SZ\") = 1440768801.345000.\n"
363 		"See also strptime_local."},
364 	{FUNC_CLASS_TIME, "strptime_local",  2,0,
365 		"Like strptime, but consults $TZ environment variable to find and use local timezone."},
366 	{FUNC_CLASS_TIME, "systime",   0,0,
367 		"Floating-point seconds since the epoch,\n"
368 		"e.g. 1440768801.748936." },
369 
370 	{FUNC_CLASS_TYPING, "is_absent",      1,0, "False if field is present in input, true otherwise"},
371 	{FUNC_CLASS_TYPING, "is_bool",        1,0, "True if field is present with boolean value. Synonymous with is_boolean."},
372 	{FUNC_CLASS_TYPING, "is_boolean",     1,0, "True if field is present with boolean value. Synonymous with is_bool."},
373 	{FUNC_CLASS_TYPING, "is_empty",       1,0, "True if field is present in input with empty string value, false otherwise."},
374 	{FUNC_CLASS_TYPING, "is_empty_map",    1,0, "True if argument is a map which is empty."},
375 	{FUNC_CLASS_TYPING, "is_float",       1,0, "True if field is present with value inferred to be float"},
376 	{FUNC_CLASS_TYPING, "is_int",         1,0, "True if field is present with value inferred to be int "},
377 	{FUNC_CLASS_TYPING, "is_map",         1,0, "True if argument is a map."},
378 	{FUNC_CLASS_TYPING, "is_nonempty_map", 1,0, "True if argument is a map which is non-empty."},
379 	{FUNC_CLASS_TYPING, "is_not_empty",    1,0, "False if field is present in input with empty value, true otherwise"},
380 	{FUNC_CLASS_TYPING, "is_not_map",      1,0, "True if argument is not a map."},
381 	{FUNC_CLASS_TYPING, "is_not_null",     1,0, "False if argument is null (empty or absent), true otherwise."},
382 	{FUNC_CLASS_TYPING, "is_null",        1,0, "True if argument is null (empty or absent), false otherwise."},
383 	{FUNC_CLASS_TYPING, "is_numeric",     1,0, "True if field is present with value inferred to be int or float"},
384 	{FUNC_CLASS_TYPING, "is_present",     1,0, "True if field is present in input, false otherwise."},
385 	{FUNC_CLASS_TYPING, "is_string",      1,0, "True if field is present with string (including empty-string) value"},
386 
387 	{FUNC_CLASS_TYPING, "asserting_absent",      1,0, "Returns argument if it is absent in the input data, else\n"
388 		"throws an error."},
389 	{FUNC_CLASS_TYPING, "asserting_bool",        1,0, "Returns argument if it is present with boolean value, else\n"
390 		"throws an error."},
391 	{FUNC_CLASS_TYPING, "asserting_boolean",     1,0, "Returns argument if it is present with boolean value, else\n"
392 		"throws an error."},
393 	{FUNC_CLASS_TYPING, "asserting_empty",       1,0, "Returns argument if it is present in input with empty value,\n"
394 		"else throws an error."},
395 	{FUNC_CLASS_TYPING, "asserting_empty_map",    1,0, "Returns argument if it is a map with empty value, else\n"
396 		"throws an error."},
397 	{FUNC_CLASS_TYPING, "asserting_float",       1,0, "Returns argument if it is present with float value, else\n"
398 		"throws an error."},
399 	{FUNC_CLASS_TYPING, "asserting_int",         1,0, "Returns argument if it is present with int value, else\n"
400 		"throws an error."},
401 	{FUNC_CLASS_TYPING, "asserting_map",         1,0, "Returns argument if it is a map, else throws an error."},
402 	{FUNC_CLASS_TYPING, "asserting_nonempty_map", 1,0, "Returns argument if it is a non-empty map, else throws\n"
403 		"an error."},
404 	{FUNC_CLASS_TYPING, "asserting_not_empty",    1,0, "Returns argument if it is present in input with non-empty\n"
405 		"value, else throws an error."},
406 	{FUNC_CLASS_TYPING, "asserting_not_map",      1,0, "Returns argument if it is not a map, else throws an error."},
407 	{FUNC_CLASS_TYPING, "asserting_not_null",     1,0, "Returns argument if it is non-null (non-empty and non-absent),\n"
408 		"else throws an error."},
409 	{FUNC_CLASS_TYPING, "asserting_null",        1,0, "Returns argument if it is null (empty or absent), else throws\n"
410 		"an error."},
411 	{FUNC_CLASS_TYPING, "asserting_numeric",     1,0, "Returns argument if it is present with int or float value,\n"
412 		"else throws an error."},
413 	{FUNC_CLASS_TYPING, "asserting_present",     1,0, "Returns argument if it is present in input, else throws\n"
414 		"an error."},
415 	{FUNC_CLASS_TYPING, "asserting_string",      1,0, "Returns argument if it is present with string (including\n"
416 		"empty-string) value, else throws an error."},
417 
418 	{FUNC_CLASS_CONVERSION, "boolean",     1,0, "Convert int/float/bool/string to boolean."},
419 	{FUNC_CLASS_CONVERSION, "float",       1,0, "Convert int/float/bool/string to float."},
420 	{FUNC_CLASS_CONVERSION, "fmtnum",    2,0,
421 		"Convert int/float/bool to string using\n"
422 		"printf-style format string, e.g. '$s = fmtnum($n, \"%06lld\")'. WARNING: Miller numbers\n"
423 		"are all long long or double. If you use formats like %d or %f, behavior is undefined."},
424 	{FUNC_CLASS_CONVERSION, "hexfmt",    1,0, "Convert int to string, e.g. 255 to \"0xff\"."},
425 	{FUNC_CLASS_CONVERSION, "int",       1,0, "Convert int/float/bool/string to int."},
426 	{FUNC_CLASS_CONVERSION, "string",    1,0, "Convert int/float/bool/string to string."},
427 	{FUNC_CLASS_CONVERSION, "typeof",    1,0,
428 		"Convert argument to type of argument (e.g.\n"
429 		"MT_STRING). For debug."},
430 
431 	{FUNC_CLASS_MAPS, "depth",         1,0, "Prints maximum depth of hashmap: ''. Scalars have depth 0."},
432 	{FUNC_CLASS_MAPS, "haskey",        2,0, "True/false if map has/hasn't key, e.g. 'haskey($*, \"a\")' or\n"
433 		"'haskey(mymap, mykey)'. Error if 1st argument is not a map."},
434 	{FUNC_CLASS_MAPS, "joink",         2,0, "Makes string from map keys. E.g. 'joink($*, \",\")'."},
435 	{FUNC_CLASS_MAPS, "joinkv",        3,0, "Makes string from map key-value pairs. E.g. 'joinkv(@v[2], \"=\", \",\")'"},
436 	{FUNC_CLASS_MAPS, "joinv",         2,0, "Makes string from map values. E.g. 'joinv(mymap, \",\")'."},
437 	{FUNC_CLASS_MAPS, "leafcount",     1,0, "Counts total number of terminal values in hashmap. For single-level maps,\n"
438 		"same as length."},
439 	{FUNC_CLASS_MAPS, "length",        1,0, "Counts number of top-level entries in hashmap. Scalars have length 1."},
440 	{FUNC_CLASS_MAPS, "mapdiff",       0,1, "With 0 args, returns empty map. With 1 arg, returns copy of arg.\n"
441 		"With 2 or more, returns copy of arg 1 with all keys from any of remaining argument maps removed."},
442 	{FUNC_CLASS_MAPS, "mapexcept",     1,1, "Returns a map with keys from remaining arguments, if any, unset.\n"
443 		"E.g. 'mapexcept({1:2,3:4,5:6}, 1, 5, 7)' is '{3:4}'."},
444 	{FUNC_CLASS_MAPS, "mapselect",       1,1, "Returns a map with only keys from remaining arguments set.\n"
445 		"E.g. 'mapselect({1:2,3:4,5:6}, 1, 5, 7)' is '{1:2,5:6}'."},
446 	{FUNC_CLASS_MAPS, "mapsum",        0,1, "With 0 args, returns empty map. With >= 1 arg, returns a map with\n"
447 		"key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'."},
448 	{FUNC_CLASS_MAPS, "splitkv",       3,0, "Splits string by separators into map with type inference.\n"
449 		"E.g. 'splitkv(\"a=1,b=2,c=3\", \"=\", \",\")' gives '{\"a\" : 1, \"b\" : 2, \"c\" : 3}'."},
450 	{FUNC_CLASS_MAPS, "splitkvx",      3,0, "Splits string by separators into map without type inference (keys and\n"
451 		"values are strings). E.g. 'splitkv(\"a=1,b=2,c=3\", \"=\", \",\")' gives\n"
452 			"'{\"a\" : \"1\", \"b\" : \"2\", \"c\" : \"3\"}'."},
453 	{FUNC_CLASS_MAPS, "splitnv",       2,0, "Splits string by separator into integer-indexed map with type inference.\n"
454 		"E.g. 'splitnv(\"a,b,c\" , \",\")' gives '{1 : \"a\", 2 : \"b\", 3 : \"c\"}'."},
455 	{FUNC_CLASS_MAPS, "splitnvx",      2,0, "Splits string by separator into integer-indexed map without type\n"
456 		"inference (values are strings). E.g. 'splitnv(\"4,5,6\" , \",\")' gives '{1 : \"4\", 2 : \"5\", 3 : \"6\"}'."},
457 
458 	{0, NULL, -1 , -1, NULL}, // table terminator
459 };
460 
461 // ----------------------------------------------------------------
check_arity(function_lookup_t lookup_table[],char * function_name,int user_provided_arity,int * parity,int * pvariadic)462 static arity_check_t check_arity(function_lookup_t lookup_table[], char* function_name,
463 	int user_provided_arity, int *parity, int* pvariadic)
464 {
465 	*parity = -1;
466 	*pvariadic = FALSE;
467 	int found_function_name = FALSE;
468 	for (int i = 0; ; i++) {
469 		function_lookup_t* plookup = &lookup_table[i];
470 		if (plookup->function_name == NULL)
471 			break;
472 		if (streq(function_name, plookup->function_name)) {
473 			found_function_name = TRUE;
474 			*parity = plookup->arity;
475 			if (plookup->variadic) {
476 				*pvariadic = TRUE;
477 				if (user_provided_arity < plookup->arity) {
478 					return ARITY_CHECK_FAIL;
479 				}
480 				return ARITY_CHECK_PASS;
481 			}
482 			if (user_provided_arity == plookup->arity) {
483 				return ARITY_CHECK_PASS;
484 			}
485 		}
486 	}
487 	if (found_function_name) {
488 		return ARITY_CHECK_FAIL;
489 	} else {
490 		return ARITY_CHECK_NO_SUCH;
491 	}
492 }
493 
fmgr_check_arity_with_report(fmgr_t * pfmgr,char * function_name,int user_provided_arity,int * pvariadic)494 static void fmgr_check_arity_with_report(fmgr_t* pfmgr, char* function_name,
495 	int user_provided_arity, int* pvariadic)
496 {
497 	int arity = -1;
498 	arity_check_t result = check_arity(pfmgr->function_lookup_table, function_name, user_provided_arity,
499 		&arity, pvariadic);
500 	if (result == ARITY_CHECK_NO_SUCH) {
501 		fprintf(stderr, "%s: Function name \"%s\" not found.\n", MLR_GLOBALS.bargv0, function_name);
502 		exit(1);
503 	}
504 	if (result == ARITY_CHECK_FAIL) {
505 		// More flexibly, I'd have a list of arities supported by each
506 		// function. But this is overkill: there are unary and binary minus and sec2gmt,
507 		// and everything else has a single arity.
508 		if (streq(function_name, "-") || streq(function_name, "sec2gmt") || streq(function_name, "sec2localtime")) {
509 			fprintf(stderr, "%s: Function named \"%s\" takes one argument or two; got %d.\n",
510 				MLR_GLOBALS.bargv0, function_name, user_provided_arity);
511 		} else if (*pvariadic) {
512 			fprintf(stderr, "%s: Function named \"%s\" takes at least %d argument%s; got %d.\n",
513 				MLR_GLOBALS.bargv0, function_name, arity, (arity == 1) ? "" : "s", user_provided_arity);
514 		} else {
515 			fprintf(stderr, "%s: Function named \"%s\" takes %d argument%s; got %d.\n",
516 				MLR_GLOBALS.bargv0, function_name, arity, (arity == 1) ? "" : "s", user_provided_arity);
517 		}
518 		exit(1);
519 	}
520 }
521 
function_class_to_desc(func_class_t function_class)522 static char* function_class_to_desc(func_class_t function_class) {
523 	switch(function_class) {
524 	case FUNC_CLASS_ARITHMETIC: return "arithmetic"; break;
525 	case FUNC_CLASS_MATH:       return "math";       break;
526 	case FUNC_CLASS_BOOLEAN:    return "boolean";    break;
527 	case FUNC_CLASS_STRING:     return "string";     break;
528 	case FUNC_CLASS_CONVERSION: return "conversion"; break;
529 	case FUNC_CLASS_TYPING:     return "typing";     break;
530 	case FUNC_CLASS_MAPS:       return "maps";       break;
531 	case FUNC_CLASS_TIME:       return "time";       break;
532 	default:                    return "???";        break;
533 	}
534 }
535 
fmgr_list_functions(fmgr_t * pfmgr,FILE * output_stream,char * leader)536 void fmgr_list_functions(fmgr_t* pfmgr, FILE* output_stream, char* leader) {
537 	char* separator = " ";
538 	int leaderlen = strlen(leader);
539 	int separatorlen = strlen(separator);
540 	int linelen = leaderlen;
541 	int j = 0;
542 
543 	for (int i = 0; ; i++) {
544 		function_lookup_t* plookup = &FUNCTION_LOOKUP_TABLE[i];
545 		char* fname = plookup->function_name;
546 		if (fname == NULL)
547 			break;
548 		int fnamelen = strlen(fname);
549 		linelen += separatorlen + fnamelen;
550 		if (linelen >= 80) {
551 			fprintf(output_stream, "\n");
552 			linelen = 0;
553 			linelen = leaderlen + separatorlen + fnamelen;
554 			j = 0;
555 		}
556 		if (j == 0)
557 			fprintf(output_stream, "%s", leader);
558 		fprintf(output_stream, "%s%s", separator, fname);
559 		j++;
560 	}
561 	fprintf(output_stream, "\n");
562 }
563 
564 // Pass function_name == NULL to get usage for all functions.
fmgr_function_usage(fmgr_t * pfmgr,FILE * output_stream,char * function_name)565 void fmgr_function_usage(fmgr_t* pfmgr, FILE* output_stream, char* function_name) {
566 	int found = FALSE;
567 	char* nfmt = "%s (class=%s #args=%d): %s\n";
568 	char* vfmt = "%s (class=%s variadic): %s\n";
569 
570 	int num_printed = 0; // > 1 matches e.g. for - and sec2gmt
571 	for (int i = 0; ; i++) {
572 		function_lookup_t* plookup = &FUNCTION_LOOKUP_TABLE[i];
573 		if (plookup->function_name == NULL) // end of table
574 			break;
575 		if (function_name == NULL || streq(function_name, plookup->function_name)) {
576 			if (++num_printed > 1)
577 				fprintf(output_stream, "\n");
578 			if (plookup->variadic) {
579 				fprintf(output_stream, vfmt, plookup->function_name,
580 					function_class_to_desc(plookup->function_class),
581 					plookup->usage_string);
582 			} else {
583 				fprintf(output_stream, nfmt, plookup->function_name,
584 					function_class_to_desc(plookup->function_class),
585 					plookup->arity, plookup->usage_string);
586 			}
587 			found = TRUE;
588 		}
589 		if (function_name == NULL)
590 			fprintf(output_stream, "\n");
591 	}
592 	if (!found)
593 		fprintf(output_stream, "%s: no such function.\n", function_name);
594 	if (function_name == NULL) {
595 		fprintf(output_stream, "To set the seed for urand, you may specify decimal or hexadecimal 32-bit\n");
596 		fprintf(output_stream, "numbers of the form \"%s --seed 123456789\" or \"%s --seed 0xcafefeed\".\n",
597 			MLR_GLOBALS.bargv0, MLR_GLOBALS.bargv0);
598 		fprintf(output_stream, "Miller's built-in variables are NF, NR, FNR, FILENUM, and FILENAME (awk-like)\n");
599 		fprintf(output_stream, "along with the mathematical constants M_PI and M_E.\n");
600 	}
601 }
602 
fmgr_list_all_functions_raw(fmgr_t * pfmgr,FILE * output_stream)603 void fmgr_list_all_functions_raw(fmgr_t* pfmgr, FILE* output_stream) {
604 	for (int i = 0; ; i++) {
605 		function_lookup_t* plookup = &FUNCTION_LOOKUP_TABLE[i];
606 		if (plookup->function_name == NULL) // end of table
607 			break;
608 		printf("%s\n", plookup->function_name);
609 	}
610 }
611 
fmgr_list_all_functions_as_table(fmgr_t * pfmgr,FILE * output_stream)612 void fmgr_list_all_functions_as_table(fmgr_t* pfmgr, FILE* output_stream) {
613 	fprintf(output_stream, "%-30s %-10s %s\n", "Name", "Class", "#Args");
614 	for (int i = 0; ; i++) {
615 		function_lookup_t* plookup = &FUNCTION_LOOKUP_TABLE[i];
616 		if (plookup->function_name == NULL) // end of table
617 			break;
618 
619 		fprintf(output_stream, "%-30s %-10s ",
620 			plookup->function_name,
621 			function_class_to_desc(plookup->function_class));
622 		if (plookup->variadic) {
623 			fprintf(output_stream, "variadic");
624 		} else {
625 			fprintf(output_stream, "%d", plookup->arity);
626 		}
627 		fprintf(output_stream, "\n");
628 	}
629 }
630 
631 // ================================================================
632 typedef struct _udf_callsite_state_t {
633 	int arity;
634 	rxval_evaluator_t** pevals;
635 	boxed_xval_t* args;
636 	udf_defsite_state_t* pdefsite_state;
637 } udf_callsite_state_t;
638 
639 // ----------------------------------------------------------------
udf_callsite_state_alloc(fmgr_t * pfmgr,udf_defsite_state_t * pdefsite_state,mlr_dsl_ast_node_t * pnode,int arity,int type_inferencing,int context_flags)640 static udf_callsite_state_t* udf_callsite_state_alloc(
641 	fmgr_t*              pfmgr,
642 	udf_defsite_state_t* pdefsite_state,
643 	mlr_dsl_ast_node_t*  pnode,
644 	int                  arity,
645 	int                  type_inferencing,
646 	int                  context_flags)
647 {
648 	udf_callsite_state_t* pstate = mlr_malloc_or_die(sizeof(udf_callsite_state_t));
649 
650 	pstate->arity = pnode->pchildren->length;
651 
652 	pstate->pevals = mlr_malloc_or_die(pstate->arity * sizeof(rxval_evaluator_t*));
653 	int i = 0;
654 	for (sllve_t* pe = pnode->pchildren->phead; pe != NULL; pe = pe->pnext, i++) {
655 		mlr_dsl_ast_node_t* parg_node = pe->pvvalue;
656 		pstate->pevals[i] = rxval_evaluator_alloc_from_ast(parg_node,
657 			pfmgr, type_inferencing, context_flags);
658 	}
659 
660 	pstate->args = mlr_malloc_or_die(pstate->arity * sizeof(boxed_xval_t));
661 	for (i = 0; i < pstate->arity; i++) {
662 		// Ownership will be transferred to local-stack which will be responsible for freeing.
663 		pstate->args[i] = box_ephemeral_val(mv_absent());
664 	}
665 
666 	pstate->pdefsite_state = pdefsite_state;
667 
668 	return pstate;
669 }
670 
671 // ----------------------------------------------------------------
udf_callsite_state_eval_args(udf_callsite_state_t * pstate,variables_t * pvars)672 static void udf_callsite_state_eval_args(udf_callsite_state_t* pstate, variables_t* pvars) {
673 	for (int i = 0; i < pstate->arity; i++) {
674 		pstate->args[i] = pstate->pevals[i]->pprocess_func(pstate->pevals[i]->pvstate, pvars);
675 	}
676 }
677 
678 // ----------------------------------------------------------------
udf_callsite_state_free(udf_callsite_state_t * pstate)679 static void udf_callsite_state_free(udf_callsite_state_t* pstate) {
680 	for (int i = 0; i < pstate->arity; i++) {
681 		rxval_evaluator_t* pxev = pstate->pevals[i];
682 		pxev->pfree_func(pxev);
683 	}
684 	free(pstate->pevals);
685 	free(pstate->args);
686 	free(pstate);
687 }
688 
689 // ----------------------------------------------------------------
rval_evaluator_udf_callsite_process(void * pvstate,variables_t * pvars)690 static mv_t rval_evaluator_udf_callsite_process(void* pvstate, variables_t* pvars) {
691 	udf_callsite_state_t* pstate = pvstate;
692 
693 	udf_callsite_state_eval_args(pstate, pvars);
694 
695 	// Functions returning map values in a scalar context get their return values treated as
696 	// absent-null. (E.g. f() returns a map and g() returns an int and the statement is '$x
697 	// = f() + g()'.) Non-scalar-context return values are handled separately (not here).
698 	boxed_xval_t retval = pstate->pdefsite_state->pprocess_func(
699 		pstate->pdefsite_state->pvstate, pstate->arity, pstate->args, pvars);
700 
701 	if (retval.xval.is_terminal) {
702 		return retval.xval.terminal_mlrval;
703 	} else {
704 		if (retval.is_ephemeral) {
705 			mlhmmv_xvalue_free(&retval.xval);
706 		}
707 		return mv_absent();
708 	}
709 }
710 
rxval_evaluator_udf_xcallsite_process(void * pvstate,variables_t * pvars)711 static boxed_xval_t rxval_evaluator_udf_xcallsite_process(void* pvstate, variables_t* pvars) {
712 	udf_callsite_state_t* pstate = pvstate;
713 	udf_callsite_state_eval_args(pstate, pvars);
714 	return pstate->pdefsite_state->pprocess_func(
715 		pstate->pdefsite_state->pvstate, pstate->arity, pstate->args, pvars);
716 }
717 
rval_evaluator_udf_callsite_free(rval_evaluator_t * pevaluator)718 static void rval_evaluator_udf_callsite_free(rval_evaluator_t* pevaluator) {
719 	udf_callsite_state_t* pstate = pevaluator->pvstate;
720 	udf_callsite_state_free(pstate);
721 	free(pevaluator);
722 }
723 
rxval_evaluator_udf_xcallsite_free(rxval_evaluator_t * pxevaluator)724 static void rxval_evaluator_udf_xcallsite_free(rxval_evaluator_t* pxevaluator) {
725 	udf_callsite_state_t* pstate = pxevaluator->pvstate;
726 	udf_callsite_state_free(pstate);
727 	free(pxevaluator);
728 }
729 
fmgr_alloc_from_udf_callsite(fmgr_t * pfmgr,udf_defsite_state_t * pdefsite_state,mlr_dsl_ast_node_t * pnode,char * function_name,int arity,int type_inferencing,int context_flags)730 static rval_evaluator_t* fmgr_alloc_from_udf_callsite(fmgr_t* pfmgr, udf_defsite_state_t* pdefsite_state,
731 	mlr_dsl_ast_node_t* pnode, char* function_name, int arity, int type_inferencing, int context_flags)
732 {
733 	rval_evaluator_t* pudf_callsite_evaluator = mlr_malloc_or_die(sizeof(rval_evaluator_t));
734 
735 	udf_callsite_state_t* pstate = udf_callsite_state_alloc(pfmgr, pdefsite_state, pnode,
736 		arity, type_inferencing, context_flags);
737 
738 	pudf_callsite_evaluator->pvstate = pstate;
739 	pudf_callsite_evaluator->pprocess_func = rval_evaluator_udf_callsite_process;
740 	pudf_callsite_evaluator->pfree_func = rval_evaluator_udf_callsite_free;
741 
742 	return pudf_callsite_evaluator;
743 }
744 
fmgr_alloc_from_udf_xcallsite(fmgr_t * pfmgr,udf_defsite_state_t * pdefsite_state,mlr_dsl_ast_node_t * pnode,char * function_name,int arity,int type_inferencing,int context_flags)745 static rxval_evaluator_t* fmgr_alloc_from_udf_xcallsite(fmgr_t* pfmgr, udf_defsite_state_t* pdefsite_state,
746 	mlr_dsl_ast_node_t* pnode, char* function_name, int arity, int type_inferencing, int context_flags)
747 {
748 	rxval_evaluator_t* pudf_xcallsite_evaluator = mlr_malloc_or_die(sizeof(rval_evaluator_t));
749 
750 	udf_callsite_state_t* pstate = udf_callsite_state_alloc(pfmgr, pdefsite_state, pnode,
751 		arity, type_inferencing, context_flags);
752 
753 	pudf_xcallsite_evaluator->pvstate = pstate;
754 	pudf_xcallsite_evaluator->pprocess_func = rxval_evaluator_udf_xcallsite_process;
755 	pudf_xcallsite_evaluator->pfree_func = rxval_evaluator_udf_xcallsite_free;
756 
757 	return pudf_xcallsite_evaluator;
758 }
759 
760 // ================================================================
761 typedef struct _unresolved_func_callsite_state_t {
762 	char* function_name;
763 	int arity;
764 	int type_inferencing;
765 	int context_flags;
766 	mlr_dsl_ast_node_t* pnode;
767 } unresolved_func_callsite_state_t;
768 
unresolved_callsite_alloc(char * function_name,int arity,int type_inferencing,int context_flags,mlr_dsl_ast_node_t * pnode)769 static unresolved_func_callsite_state_t* unresolved_callsite_alloc(char* function_name, int arity,
770 	int type_inferencing, int context_flags, mlr_dsl_ast_node_t* pnode)
771 {
772 	unresolved_func_callsite_state_t* pstate = mlr_malloc_or_die(sizeof(unresolved_func_callsite_state_t));
773 	pstate->function_name    = mlr_strdup_or_die(function_name);
774 	pstate->arity            = arity;
775 	pstate->type_inferencing = type_inferencing;
776 	pstate->context_flags    = context_flags;
777 	pstate->pnode            = pnode;
778 	return pstate;
779 }
780 
unresolved_callsite_free(unresolved_func_callsite_state_t * pstate)781 static void unresolved_callsite_free(unresolved_func_callsite_state_t* pstate) {
782 	if (pstate == NULL)
783 		return;
784 	free(pstate->function_name);
785 	free(pstate);
786 }
787 
788 // ----------------------------------------------------------------
provisional_call_func(void * pvstate,variables_t * pvars)789 static mv_t provisional_call_func(void* pvstate, variables_t* pvars) {
790 	unresolved_func_callsite_state_t* pstate = pvstate;
791 	fprintf(stderr,
792 		"%s: internal coding error: unresolved scalar-return-value callsite \"%s\".\n",
793 		MLR_GLOBALS.bargv0, pstate->function_name);
794 	exit(1);
795 }
796 
provisional_call_free(rval_evaluator_t * pevaluator)797 static void provisional_call_free(rval_evaluator_t* pevaluator) {
798 	unresolved_func_callsite_state_t* pstate = pevaluator->pvstate;
799 	unresolved_callsite_free(pstate);
800 	free(pevaluator);
801 }
802 
fmgr_alloc_provisional_from_operator_or_function_call(fmgr_t * pfmgr,mlr_dsl_ast_node_t * pnode,int type_inferencing,int context_flags)803 rval_evaluator_t* fmgr_alloc_provisional_from_operator_or_function_call(fmgr_t* pfmgr, mlr_dsl_ast_node_t* pnode,
804 	int type_inferencing, int context_flags)
805 {
806 	char* function_name = pnode->text;
807 	int user_provided_arity = pnode->pchildren->length;
808 
809 	unresolved_func_callsite_state_t* pstate = unresolved_callsite_alloc(function_name, user_provided_arity,
810 		type_inferencing, context_flags, pnode);
811 
812 	rval_evaluator_t* pev = mlr_malloc_or_die(sizeof(rval_evaluator_t));
813 	pev->pvstate       = pstate;
814 	pev->pprocess_func = provisional_call_func;
815 	pev->pfree_func    = provisional_call_free;
816 
817 	// Remember this callsite to a function which may or may not have been defined yet.
818 	// Then later we can resolve them to point to UDF bodies which have been defined.
819 	fmgr_mark_callsite_to_resolve(pfmgr, pev);
820 
821 	return pev;
822 }
823 
824 // ----------------------------------------------------------------
provisional_xcall_func(void * pvstate,variables_t * pvars)825 static boxed_xval_t provisional_xcall_func(void* pvstate, variables_t* pvars) {
826 	unresolved_func_callsite_state_t* pstate = pvstate;
827 	fprintf(stderr,
828 		"%s: internal coding error: unresolved map-return-value callsite \"%s\".\n",
829 		MLR_GLOBALS.bargv0, pstate->function_name);
830 	exit(1);
831 }
832 
provisional_xcall_free(rxval_evaluator_t * pxevaluator)833 static void provisional_xcall_free(rxval_evaluator_t* pxevaluator) {
834 	unresolved_func_callsite_state_t* pstate = pxevaluator->pvstate;
835 	unresolved_callsite_free(pstate);
836 	free(pxevaluator);
837 }
838 
fmgr_xalloc_provisional_from_operator_or_function_call(fmgr_t * pfmgr,mlr_dsl_ast_node_t * pnode,int type_inferencing,int context_flags)839 rxval_evaluator_t* fmgr_xalloc_provisional_from_operator_or_function_call(fmgr_t* pfmgr, mlr_dsl_ast_node_t* pnode,
840 	int type_inferencing, int context_flags)
841 {
842 	char* function_name = pnode->text;
843 	int user_provided_arity = pnode->pchildren->length;
844 
845 	unresolved_func_callsite_state_t* pstate = unresolved_callsite_alloc(function_name, user_provided_arity,
846 		type_inferencing, context_flags, pnode);
847 
848 	rxval_evaluator_t* pxev = mlr_malloc_or_die(sizeof(rxval_evaluator_t));
849 	pxev->pvstate       = pstate;
850 	pxev->pprocess_func = provisional_xcall_func;
851 	pxev->pfree_func    = provisional_xcall_free;
852 
853 	// Remember this callsite to a function which may or may not have been defined yet.
854 	// Then later we can resolve them to point to UDF bodies which have been defined.
855 	fmgr_mark_xcallsite_to_resolve(pfmgr, pxev);
856 
857 	return pxev;
858 }
859 
860 // ----------------------------------------------------------------
fmgr_mark_callsite_to_resolve(fmgr_t * pfmgr,rval_evaluator_t * pev)861 void fmgr_mark_callsite_to_resolve(fmgr_t* pfmgr, rval_evaluator_t* pev) {
862 	sllv_append(pfmgr->pfunc_callsite_evaluators_to_resolve, pev);
863 }
864 
fmgr_mark_xcallsite_to_resolve(fmgr_t * pfmgr,rxval_evaluator_t * pxev)865 void fmgr_mark_xcallsite_to_resolve(fmgr_t* pfmgr, rxval_evaluator_t* pxev) {
866 	sllv_append(pfmgr->pfunc_callsite_xevaluators_to_resolve, pxev);
867 }
868 
869 // ----------------------------------------------------------------
870 // Resolving a callsite involves treewalking the AST which may find more callsites to
871 // resolve. E.g. in '$y = f(g($x))', f is initially unresolved (f and/or g perhaps as yet
872 // undefined as of when the callsite is parsed), then at resolution time for f, its
873 // argument 'g($x)' is encountered, initially unresolved, then resolved.
874 // Hence the outer loop.
fmgr_resolve_func_callsites(fmgr_t * pfmgr)875 void fmgr_resolve_func_callsites(fmgr_t* pfmgr) {
876 	while (TRUE) {
877 		int did = FALSE;
878 		while (pfmgr->pfunc_callsite_xevaluators_to_resolve->phead != NULL) {
879 			did = TRUE;
880 			rxval_evaluator_t* pxev = sllv_pop(pfmgr->pfunc_callsite_xevaluators_to_resolve);
881 			unresolved_func_callsite_state_t* ptemp_state = pxev->pvstate;
882 			resolve_func_xcallsite(pfmgr, pxev);
883 			unresolved_callsite_free(ptemp_state);
884 		}
885 
886 		while (pfmgr->pfunc_callsite_evaluators_to_resolve->phead != NULL) {
887 			did = TRUE;
888 			rval_evaluator_t* pev = sllv_pop(pfmgr->pfunc_callsite_evaluators_to_resolve);
889 			unresolved_func_callsite_state_t* ptemp_state = pev->pvstate;
890 			resolve_func_callsite(pfmgr, pev);
891 			unresolved_callsite_free(ptemp_state);
892 		}
893 		if (!did) {
894 			break;
895 		}
896 	}
897 }
898 
899 // ----------------------------------------------------------------
construct_udf_callsite_evaluator(fmgr_t * pfmgr,unresolved_func_callsite_state_t * pcallsite)900 static rval_evaluator_t* construct_udf_callsite_evaluator(
901 	fmgr_t* pfmgr,
902 	unresolved_func_callsite_state_t* pcallsite)
903 {
904 	char* function_name       = pcallsite->function_name;
905 	int   user_provided_arity = pcallsite->arity;
906 	int   type_inferencing    = pcallsite->type_inferencing;
907 	int   context_flags       = pcallsite->context_flags;
908 	mlr_dsl_ast_node_t* pnode = pcallsite->pnode;
909 
910 	udf_defsite_state_t* pudf_defsite_state = lhmsv_get(pfmgr->pudf_names_to_defsite_states,
911 		pcallsite->function_name);
912 
913 	if (pudf_defsite_state != NULL) {
914 		int udf_arity = pudf_defsite_state->arity;
915 		if (user_provided_arity != udf_arity) {
916 			fprintf(stderr, "Function named \"%s\" takes %d argument%s; got %d.\n",
917 				function_name, udf_arity, (udf_arity == 1) ? "" : "s", user_provided_arity);
918 			exit(1);
919 		}
920 
921 		return fmgr_alloc_from_udf_callsite(pfmgr, pudf_defsite_state,
922 			pnode, function_name, user_provided_arity, type_inferencing, context_flags);
923 	} else {
924 		return NULL;
925 	}
926 }
927 
construct_udf_defsite_xevaluator(fmgr_t * pfmgr,unresolved_func_callsite_state_t * pcallsite)928 static rxval_evaluator_t* construct_udf_defsite_xevaluator(
929 	fmgr_t* pfmgr,
930 	unresolved_func_callsite_state_t* pcallsite)
931 {
932 	char* function_name       = pcallsite->function_name;
933 	int   user_provided_arity = pcallsite->arity;
934 	int   type_inferencing    = pcallsite->type_inferencing;
935 	int   context_flags       = pcallsite->context_flags;
936 	mlr_dsl_ast_node_t* pnode = pcallsite->pnode;
937 
938 	udf_defsite_state_t* pudf_defsite_state = lhmsv_get(pfmgr->pudf_names_to_defsite_states,
939 		pcallsite->function_name);
940 
941 	if (pudf_defsite_state != NULL) {
942 		int udf_arity = pudf_defsite_state->arity;
943 		if (user_provided_arity != udf_arity) {
944 			fprintf(stderr, "Function named \"%s\" takes %d argument%s; got %d.\n",
945 				function_name, udf_arity, (udf_arity == 1) ? "" : "s", user_provided_arity);
946 			exit(1);
947 		}
948 
949 		return fmgr_alloc_from_udf_xcallsite(pfmgr, pudf_defsite_state,
950 			pnode, function_name, user_provided_arity, type_inferencing, context_flags);
951 	} else {
952 		return NULL;
953 	}
954 }
955 
956 // ----------------------------------------------------------------
construct_builtin_function_callsite_evaluator(fmgr_t * pfmgr,unresolved_func_callsite_state_t * pcallsite)957 static rval_evaluator_t* construct_builtin_function_callsite_evaluator(
958 	fmgr_t* pfmgr,
959 	unresolved_func_callsite_state_t* pcallsite)
960 {
961 	char* function_name       = pcallsite->function_name;
962 	int   user_provided_arity = pcallsite->arity;
963 	int   type_inferencing    = pcallsite->type_inferencing;
964 	int   context_flags       = pcallsite->context_flags;
965 	mlr_dsl_ast_node_t* pnode = pcallsite->pnode;
966 
967 	int variadic = FALSE;
968 	fmgr_check_arity_with_report(pfmgr, function_name, user_provided_arity, &variadic);
969 
970 	rval_evaluator_t* pevaluator = NULL;
971 	if (variadic) {
972 		int nargs = pnode->pchildren->length;
973 		rval_evaluator_t** pargs = mlr_malloc_or_die(nargs * sizeof(rval_evaluator_t*));
974 		int i = 0;
975 		for (sllve_t* pe = pnode->pchildren->phead; pe != NULL; pe = pe->pnext, i++) {
976 			mlr_dsl_ast_node_t* pchild = pe->pvvalue;
977 			pargs[i] = rval_evaluator_alloc_from_ast(pchild, pfmgr, type_inferencing, context_flags);
978 		}
979 		pevaluator = fmgr_alloc_evaluator_from_variadic_func_name(function_name, pargs, nargs);
980 
981 	} else if (user_provided_arity == 0) {
982 		pevaluator = fmgr_alloc_evaluator_from_zary_func_name(function_name);
983 	} else if (user_provided_arity == 1) {
984 		mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
985 		rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
986 		pevaluator = fmgr_alloc_evaluator_from_unary_func_name(function_name, parg1);
987 	} else if (user_provided_arity == 2) {
988 		mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
989 		mlr_dsl_ast_node_t* parg2_node = pnode->pchildren->phead->pnext->pvvalue;
990 		int type2 = parg2_node->type;
991 
992 		int is_regexy =
993 			streq(function_name, "=~") ||
994 			streq(function_name, "!=~") ||
995 			streq(function_name, "regextract");
996 
997 		if (is_regexy && type2 == MD_AST_NODE_TYPE_STRING_LITERAL) {
998 			rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
999 			pevaluator = fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(function_name,
1000 				parg1, parg2_node->text, FALSE);
1001 		} else if (is_regexy && type2 == MD_AST_NODE_TYPE_REGEXI) {
1002 			rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1003 			pevaluator = fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(function_name, parg1, parg2_node->text,
1004 				TYPE_INFER_STRING_FLOAT_INT);
1005 		} else {
1006 			// regexes can still be applied here, e.g. if the 2nd argument is a non-terminal AST: however
1007 			// the regexes will be compiled record-by-record rather than once at alloc time, which will
1008 			// be slower.
1009 			rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1010 			rval_evaluator_t* parg2 = rval_evaluator_alloc_from_ast(parg2_node, pfmgr, type_inferencing, context_flags);
1011 			pevaluator = fmgr_alloc_evaluator_from_binary_func_name(function_name, parg1, parg2);
1012 		}
1013 
1014 	} else if (user_provided_arity == 3) {
1015 		mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
1016 		mlr_dsl_ast_node_t* parg2_node = pnode->pchildren->phead->pnext->pvvalue;
1017 		mlr_dsl_ast_node_t* parg3_node = pnode->pchildren->phead->pnext->pnext->pvvalue;
1018 		int type2 = parg2_node->type;
1019 
1020 		int is_regexy =
1021 			streq(function_name, "sub") ||
1022 			streq(function_name, "gsub") ||
1023 			streq(function_name, "regextract_or_else");
1024 
1025 		if (is_regexy && type2 == MD_AST_NODE_TYPE_STRING_LITERAL) {
1026 			// sub/gsub-regex special case:
1027 			rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1028 			rval_evaluator_t* parg3 = rval_evaluator_alloc_from_ast(parg3_node, pfmgr, type_inferencing, context_flags);
1029 			pevaluator = fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(function_name, parg1, parg2_node->text,
1030 				FALSE, parg3);
1031 
1032 		} else if (is_regexy && type2 == MD_AST_NODE_TYPE_REGEXI) {
1033 			// sub/gsub-regex special case:
1034 			rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1035 			rval_evaluator_t* parg3 = rval_evaluator_alloc_from_ast(parg3_node, pfmgr, type_inferencing, context_flags);
1036 			pevaluator = fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(function_name, parg1, parg2_node->text,
1037 				TYPE_INFER_STRING_FLOAT_INT, parg3);
1038 
1039 		} else {
1040 			// regexes can still be applied here, e.g. if the 2nd argument is a non-terminal AST: however
1041 			// the regexes will be compiled record-by-record rather than once at alloc time, which will
1042 			// be slower.
1043 			rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1044 			rval_evaluator_t* parg2 = rval_evaluator_alloc_from_ast(parg2_node, pfmgr, type_inferencing, context_flags);
1045 			rval_evaluator_t* parg3 = rval_evaluator_alloc_from_ast(parg3_node, pfmgr, type_inferencing, context_flags);
1046 			pevaluator = fmgr_alloc_evaluator_from_ternary_func_name(function_name, parg1, parg2, parg3);
1047 		}
1048 
1049 	} else {
1050 		fprintf(stderr, "Miller: internal coding error:  arity for function name \"%s\" misdetected.\n",
1051 			function_name);
1052 		exit(1);
1053 	}
1054 
1055 	return pevaluator;
1056 }
1057 
1058 // ----------------------------------------------------------------
1059 // At callsites, arguments can be scalars or maps; return values can be scalars
1060 // or maps.  At the user level, a function take map input and produce scalar
1061 // output or vice versa. As of this writing, though, *internally* functions
1062 // go from scalars to scalar or maps to map. This wrapper wraps scalar input
1063 // to functions which know about maps.
1064 
1065 typedef struct _xeval_wrapping_eval_state_t {
1066 	rval_evaluator_t* pevaluator;
1067 } xeval_wrapping_eval_state_t;
1068 
xeval_wrapping_eval_func(void * pvstate,variables_t * pvars)1069 static boxed_xval_t xeval_wrapping_eval_func(void* pvstate, variables_t* pvars) {
1070 	xeval_wrapping_eval_state_t* pstate = pvstate;
1071 	rval_evaluator_t* pevaluator = pstate->pevaluator;
1072 	mv_t val = pevaluator->pprocess_func(pevaluator->pvstate, pvars);
1073 	return (boxed_xval_t) {
1074 		.xval = mlhmmv_xvalue_wrap_terminal(val),
1075 		.is_ephemeral = TRUE, // xxx verify reference semantics for RHS evaluators!
1076 	};
1077 }
1078 
xeval_wrapping_eval_free(rxval_evaluator_t * pxevaluator)1079 static void xeval_wrapping_eval_free(rxval_evaluator_t* pxevaluator) {
1080 	xeval_wrapping_eval_state_t* pstate = pxevaluator->pvstate;
1081 	pstate->pevaluator->pfree_func(pstate->pevaluator);
1082 	free(pstate);
1083 	free(pxevaluator);
1084 }
1085 
fmgr_alloc_xeval_wrapping_eval(rval_evaluator_t * pevaluator)1086 static rxval_evaluator_t* fmgr_alloc_xeval_wrapping_eval(rval_evaluator_t* pevaluator) {
1087 	rxval_evaluator_t* pxevaluator = mlr_malloc_or_die(sizeof(rxval_evaluator_t));
1088 
1089 	xeval_wrapping_eval_state_t* pstate = mlr_malloc_or_die(sizeof(xeval_wrapping_eval_state_t));
1090 	pstate->pevaluator = pevaluator;
1091 
1092 	pxevaluator->pvstate       = pstate;
1093 	pxevaluator->pprocess_func = xeval_wrapping_eval_func;
1094 	pxevaluator->pfree_func    = xeval_wrapping_eval_free;
1095 
1096 	return pxevaluator;
1097 }
1098 
1099 // ----------------------------------------------------------------
1100 // At callsites, arguments can be scalars or maps; return values can be scalars
1101 // or maps.  At the user level, a function take map input and produce scalar
1102 // output or vice versa. As of this writing, though, *internally* functions go
1103 // from scalars to scalar or maps to map. This wrapper wraps maybe-map input to
1104 // functions which do not know about maps.
1105 
1106 typedef struct _eval_wrapping_xeval_state_t {
1107 	rxval_evaluator_t* pxevaluator;
1108 } eval_wrapping_xeval_state_t;
1109 
eval_wrapping_xeval_func(void * pvstate,variables_t * pvars)1110 static mv_t eval_wrapping_xeval_func(void* pvstate, variables_t* pvars) {
1111 	eval_wrapping_xeval_state_t* pstate = pvstate;
1112 	rxval_evaluator_t* pxevaluator = pstate->pxevaluator;
1113 	boxed_xval_t bxval = pxevaluator->pprocess_func(pxevaluator->pvstate, pvars);
1114 
1115 	if (bxval.xval.is_terminal) {
1116 		if (bxval.is_ephemeral) {
1117 			return bxval.xval.terminal_mlrval;
1118 		} else {
1119 			return mv_copy(&bxval.xval.terminal_mlrval);
1120 		}
1121 
1122 	} else {
1123 		if (bxval.is_ephemeral) {
1124 			mlhmmv_xvalue_free(&bxval.xval);
1125 		}
1126 		return mv_error();
1127 	}
1128 
1129 }
1130 
eval_wrapping_xeval_free(rval_evaluator_t * pevaluator)1131 static void eval_wrapping_xeval_free(rval_evaluator_t* pevaluator) {
1132 	eval_wrapping_xeval_state_t* pstate = pevaluator->pvstate;
1133 	pstate->pxevaluator->pfree_func(pstate->pxevaluator);
1134 	free(pstate);
1135 	free(pevaluator);
1136 }
1137 
fmgr_alloc_eval_wrapping_xeval(rxval_evaluator_t * pxevaluator)1138 static rval_evaluator_t* fmgr_alloc_eval_wrapping_xeval(rxval_evaluator_t* pxevaluator) {
1139 	rval_evaluator_t* pevaluator = mlr_malloc_or_die(sizeof(rval_evaluator_t));
1140 
1141 	eval_wrapping_xeval_state_t* pstate = mlr_malloc_or_die(sizeof(eval_wrapping_xeval_state_t));
1142 	pstate->pxevaluator = pxevaluator;
1143 
1144 	pevaluator->pvstate       = pstate;
1145 	pevaluator->pprocess_func = eval_wrapping_xeval_func;
1146 	pevaluator->pfree_func    = eval_wrapping_xeval_free;
1147 
1148 	return pevaluator;
1149 }
1150 
1151 // ================================================================
fmgr_alloc_evaluator_from_variadic_func_name(char * fnnm,rval_evaluator_t ** pargs,int nargs)1152 static rval_evaluator_t* fmgr_alloc_evaluator_from_variadic_func_name(char* fnnm, rval_evaluator_t** pargs, int nargs) {
1153 	if        (streq(fnnm, "min")) { return rval_evaluator_alloc_from_variadic_func(variadic_min_func, pargs, nargs);
1154 	} else if (streq(fnnm, "max")) { return rval_evaluator_alloc_from_variadic_func(variadic_max_func, pargs, nargs);
1155 	} else return NULL;
1156 }
1157 
1158 // ================================================================
fmgr_alloc_evaluator_from_zary_func_name(char * function_name)1159 static rval_evaluator_t* fmgr_alloc_evaluator_from_zary_func_name(char* function_name) {
1160 	if        (streq(function_name, "urand")) {
1161 		return rval_evaluator_alloc_from_x_z_func(f_z_urand_func);
1162 	} else if (streq(function_name, "urand32")) {
1163 		return rval_evaluator_alloc_from_x_z_func(i_z_urand32_func);
1164 	} else if (streq(function_name, "systime")) {
1165 		return rval_evaluator_alloc_from_x_z_func(f_z_systime_func);
1166 	} else  {
1167 		return NULL;
1168 	}
1169 }
1170 
1171 // ================================================================
fmgr_alloc_evaluator_from_unary_func_name(char * fnnm,rval_evaluator_t * parg1)1172 static rval_evaluator_t* fmgr_alloc_evaluator_from_unary_func_name(char* fnnm, rval_evaluator_t* parg1)  {
1173 	if        (streq(fnnm, "!"))               { return rval_evaluator_alloc_from_b_b_func(b_b_not_func,         parg1);
1174 	} else if (streq(fnnm, "+"))               { return rval_evaluator_alloc_from_x_x_func(x_x_upos_func,        parg1);
1175 	} else if (streq(fnnm, "-"))               { return rval_evaluator_alloc_from_x_x_func(x_x_uneg_func,        parg1);
1176 	} else if (streq(fnnm, ".+"))              { return rval_evaluator_alloc_from_x_x_func(x_x_upos_func,        parg1);
1177 	} else if (streq(fnnm, ".-"))              { return rval_evaluator_alloc_from_x_x_func(x_x_uneg_func,        parg1);
1178 	} else if (streq(fnnm, "abs"))             { return rval_evaluator_alloc_from_x_x_func(x_x_abs_func,         parg1);
1179 	} else if (streq(fnnm, "acos"))            { return rval_evaluator_alloc_from_f_f_func(f_f_acos_func,        parg1);
1180 	} else if (streq(fnnm, "acosh"))           { return rval_evaluator_alloc_from_f_f_func(f_f_acosh_func,       parg1);
1181 	} else if (streq(fnnm, "asin"))            { return rval_evaluator_alloc_from_f_f_func(f_f_asin_func,        parg1);
1182 	} else if (streq(fnnm, "asinh"))           { return rval_evaluator_alloc_from_f_f_func(f_f_asinh_func,       parg1);
1183 	} else if (streq(fnnm, "atan"))            { return rval_evaluator_alloc_from_f_f_func(f_f_atan_func,        parg1);
1184 	} else if (streq(fnnm, "atanh"))           { return rval_evaluator_alloc_from_f_f_func(f_f_atanh_func,       parg1);
1185 	} else if (streq(fnnm, "bitcount"))        { return rval_evaluator_alloc_from_i_i_func(i_i_bitcount_func,    parg1);
1186 	} else if (streq(fnnm, "boolean"))         { return rval_evaluator_alloc_from_x_x_func(b_x_boolean_func,     parg1);
1187 	} else if (streq(fnnm, "cbrt"))            { return rval_evaluator_alloc_from_f_f_func(f_f_cbrt_func,        parg1);
1188 	} else if (streq(fnnm, "ceil"))            { return rval_evaluator_alloc_from_x_x_func(x_x_ceil_func,        parg1);
1189 	} else if (streq(fnnm, "cos"))             { return rval_evaluator_alloc_from_f_f_func(f_f_cos_func,         parg1);
1190 	} else if (streq(fnnm, "cosh"))            { return rval_evaluator_alloc_from_f_f_func(f_f_cosh_func,        parg1);
1191 	} else if (streq(fnnm, "dhms2fsec"))       { return rval_evaluator_alloc_from_f_s_func(f_s_dhms2fsec_func,   parg1);
1192 	} else if (streq(fnnm, "dhms2sec"))        { return rval_evaluator_alloc_from_f_s_func(i_s_dhms2sec_func,    parg1);
1193 	} else if (streq(fnnm, "erf"))             { return rval_evaluator_alloc_from_f_f_func(f_f_erf_func,         parg1);
1194 	} else if (streq(fnnm, "erfc"))            { return rval_evaluator_alloc_from_f_f_func(f_f_erfc_func,        parg1);
1195 	} else if (streq(fnnm, "exp"))             { return rval_evaluator_alloc_from_f_f_func(f_f_exp_func,         parg1);
1196 	} else if (streq(fnnm, "expm1"))           { return rval_evaluator_alloc_from_f_f_func(f_f_expm1_func,       parg1);
1197 	} else if (streq(fnnm, "float"))           { return rval_evaluator_alloc_from_x_x_func(f_x_float_func,       parg1);
1198 	} else if (streq(fnnm, "floor"))           { return rval_evaluator_alloc_from_x_x_func(x_x_floor_func,       parg1);
1199 	} else if (streq(fnnm, "fsec2dhms"))       { return rval_evaluator_alloc_from_s_f_func(s_f_fsec2dhms_func,   parg1);
1200 	} else if (streq(fnnm, "fsec2hms"))        { return rval_evaluator_alloc_from_s_f_func(s_f_fsec2hms_func,    parg1);
1201 	} else if (streq(fnnm, "gmt2sec"))         { return rval_evaluator_alloc_from_i_s_func(i_s_gmt2sec_func,     parg1);
1202 	} else if (streq(fnnm, "localtime2sec"))   { return rval_evaluator_alloc_from_i_s_func(i_s_localtime2sec_func, parg1);
1203 	} else if (streq(fnnm, "hexfmt"))          { return rval_evaluator_alloc_from_x_x_func(s_x_hexfmt_func,      parg1);
1204 	} else if (streq(fnnm, "hms2fsec"))        { return rval_evaluator_alloc_from_f_s_func(f_s_hms2fsec_func,    parg1);
1205 	} else if (streq(fnnm, "hms2sec"))         { return rval_evaluator_alloc_from_f_s_func(i_s_hms2sec_func,     parg1);
1206 	} else if (streq(fnnm, "int"))             { return rval_evaluator_alloc_from_x_x_func(i_x_int_func,         parg1);
1207 	} else if (streq(fnnm, "invqnorm"))        { return rval_evaluator_alloc_from_f_f_func(f_f_invqnorm_func,    parg1);
1208 	} else if (streq(fnnm, "log"))             { return rval_evaluator_alloc_from_f_f_func(f_f_log_func,         parg1);
1209 	} else if (streq(fnnm, "log10"))           { return rval_evaluator_alloc_from_f_f_func(f_f_log10_func,       parg1);
1210 	} else if (streq(fnnm, "log1p"))           { return rval_evaluator_alloc_from_f_f_func(f_f_log1p_func,       parg1);
1211 	} else if (streq(fnnm, "qnorm"))           { return rval_evaluator_alloc_from_f_f_func(f_f_qnorm_func,       parg1);
1212 	} else if (streq(fnnm, "round"))           { return rval_evaluator_alloc_from_x_x_func(x_x_round_func,       parg1);
1213 	} else if (streq(fnnm, "sec2dhms"))        { return rval_evaluator_alloc_from_s_i_func(s_i_sec2dhms_func,    parg1);
1214 	} else if (streq(fnnm, "sec2gmt"))         { return rval_evaluator_alloc_from_x_x_func(s_x_sec2gmt_func,     parg1);
1215 	} else if (streq(fnnm, "sec2gmtdate"))     { return rval_evaluator_alloc_from_x_x_func(s_x_sec2gmtdate_func, parg1);
1216 	} else if (streq(fnnm, "sec2localtime"))   { return rval_evaluator_alloc_from_x_x_func(s_x_sec2localtime_func, parg1);
1217 	} else if (streq(fnnm, "sec2localdate"))   { return rval_evaluator_alloc_from_x_x_func(s_x_sec2localdate_func, parg1);
1218 	} else if (streq(fnnm, "sec2hms"))         { return rval_evaluator_alloc_from_s_i_func(s_i_sec2hms_func,     parg1);
1219 	} else if (streq(fnnm, "sgn"))             { return rval_evaluator_alloc_from_x_x_func(x_x_sgn_func,         parg1);
1220 	} else if (streq(fnnm, "sin"))             { return rval_evaluator_alloc_from_f_f_func(f_f_sin_func,         parg1);
1221 	} else if (streq(fnnm, "sinh"))            { return rval_evaluator_alloc_from_f_f_func(f_f_sinh_func,        parg1);
1222 	} else if (streq(fnnm, "sqrt"))            { return rval_evaluator_alloc_from_f_f_func(f_f_sqrt_func,        parg1);
1223 	} else if (streq(fnnm, "string"))          { return rval_evaluator_alloc_from_x_x_func(s_x_string_func,      parg1);
1224 	} else if (streq(fnnm, "strlen"))          { return rval_evaluator_alloc_from_i_s_func(i_s_strlen_func,      parg1);
1225 	} else if (streq(fnnm, "tan"))             { return rval_evaluator_alloc_from_f_f_func(f_f_tan_func,         parg1);
1226 	} else if (streq(fnnm, "tanh"))            { return rval_evaluator_alloc_from_f_f_func(f_f_tanh_func,        parg1);
1227 	} else if (streq(fnnm, "tolower"))         { return rval_evaluator_alloc_from_s_s_func(s_s_tolower_func,     parg1);
1228 	} else if (streq(fnnm, "toupper"))         { return rval_evaluator_alloc_from_s_s_func(s_s_toupper_func,     parg1);
1229 	} else if (streq(fnnm, "capitalize"))      { return rval_evaluator_alloc_from_s_s_func(s_s_capitalize_func,  parg1);
1230 	} else if (streq(fnnm, "system"))          { return rval_evaluator_alloc_from_s_s_func(s_s_system_func,      parg1);
1231 	} else if (streq(fnnm, "lstrip"))          { return rval_evaluator_alloc_from_s_s_func(s_s_lstrip_func,      parg1);
1232 	} else if (streq(fnnm, "rstrip"))          { return rval_evaluator_alloc_from_s_s_func(s_s_rstrip_func,      parg1);
1233 	} else if (streq(fnnm, "strip"))           { return rval_evaluator_alloc_from_s_s_func(s_s_strip_func,       parg1);
1234 	} else if (streq(fnnm, "collapse_whitespace")) { return rval_evaluator_alloc_from_s_s_func(s_s_collapse_whitespace_func, parg1);
1235 	} else if (streq(fnnm, "clean_whitespace")) { return rval_evaluator_alloc_from_s_s_func(s_s_clean_whitespace_func, parg1);
1236 	} else if (streq(fnnm, "~"))               { return rval_evaluator_alloc_from_i_i_func(i_i_bitwise_not_func, parg1);
1237 
1238 	} else return NULL;
1239 }
1240 
1241 // ================================================================
fmgr_alloc_evaluator_from_binary_func_name(char * fnnm,rval_evaluator_t * parg1,rval_evaluator_t * parg2)1242 static rval_evaluator_t* fmgr_alloc_evaluator_from_binary_func_name(char* fnnm,
1243 	rval_evaluator_t* parg1, rval_evaluator_t* parg2)
1244 {
1245 	if        (streq(fnnm, "&&"))   { return rval_evaluator_alloc_from_b_bb_and_func(parg1, parg2);
1246 	} else if (streq(fnnm, "||"))   { return rval_evaluator_alloc_from_b_bb_or_func (parg1, parg2);
1247 	} else if (streq(fnnm, "^^"))   { return rval_evaluator_alloc_from_b_bb_xor_func(parg1, parg2);
1248 	} else if (streq(fnnm, "=~"))   { return rval_evaluator_alloc_from_x_ssc_func(
1249 		matches_no_precomp_func, parg1, parg2);
1250 	} else if (streq(fnnm, "regextract"))   { return rval_evaluator_alloc_from_x_ss_func(
1251 		regextract_no_precomp_func, parg1, parg2);
1252 	} else if (streq(fnnm, "!=~"))  { return rval_evaluator_alloc_from_x_ssc_func(does_not_match_no_precomp_func, parg1, parg2);
1253 	} else if (streq(fnnm, "=="))   { return rval_evaluator_alloc_from_x_xx_func(eq_op_func,             parg1, parg2);
1254 	} else if (streq(fnnm, "!="))   { return rval_evaluator_alloc_from_x_xx_func(ne_op_func,             parg1, parg2);
1255 	} else if (streq(fnnm, ">"))    { return rval_evaluator_alloc_from_x_xx_func(gt_op_func,             parg1, parg2);
1256 	} else if (streq(fnnm, ">="))   { return rval_evaluator_alloc_from_x_xx_func(ge_op_func,             parg1, parg2);
1257 	} else if (streq(fnnm, "<"))    { return rval_evaluator_alloc_from_x_xx_func(lt_op_func,             parg1, parg2);
1258 	} else if (streq(fnnm, "<="))   { return rval_evaluator_alloc_from_x_xx_func(le_op_func,             parg1, parg2);
1259 	} else if (streq(fnnm, "."))    { return rval_evaluator_alloc_from_x_xx_func(s_xx_dot_func,          parg1, parg2);
1260 
1261 	} else if (streq(fnnm, "+"))    { return rval_evaluator_alloc_from_x_xx_func(x_xx_plus_func,         parg1, parg2);
1262 	} else if (streq(fnnm, "-"))    { return rval_evaluator_alloc_from_x_xx_func(x_xx_minus_func,        parg1, parg2);
1263 	} else if (streq(fnnm, "*"))    { return rval_evaluator_alloc_from_x_xx_func(x_xx_times_func,        parg1, parg2);
1264 	} else if (streq(fnnm, "/"))    { return rval_evaluator_alloc_from_x_xx_func(x_xx_divide_func,       parg1, parg2);
1265 	} else if (streq(fnnm, "//"))   { return rval_evaluator_alloc_from_x_xx_func(x_xx_int_divide_func,   parg1, parg2);
1266 
1267 	} else if (streq(fnnm, ".+"))   { return rval_evaluator_alloc_from_x_xx_func(x_xx_oplus_func,        parg1, parg2);
1268 	} else if (streq(fnnm, ".-"))   { return rval_evaluator_alloc_from_x_xx_func(x_xx_ominus_func,       parg1, parg2);
1269 	} else if (streq(fnnm, ".*"))   { return rval_evaluator_alloc_from_x_xx_func(x_xx_otimes_func,       parg1, parg2);
1270 	} else if (streq(fnnm, "./"))   { return rval_evaluator_alloc_from_x_xx_func(x_xx_odivide_func,      parg1, parg2);
1271 	} else if (streq(fnnm, ".//"))  { return rval_evaluator_alloc_from_x_xx_func(x_xx_int_odivide_func,  parg1, parg2);
1272 
1273 	} else if (streq(fnnm, "%"))    { return rval_evaluator_alloc_from_x_xx_func(x_xx_mod_func,          parg1, parg2);
1274 	} else if (streq(fnnm, "**"))   { return rval_evaluator_alloc_from_f_ff_func(f_ff_pow_func,          parg1, parg2);
1275 	} else if (streq(fnnm, "pow"))  { return rval_evaluator_alloc_from_f_ff_func(f_ff_pow_func,          parg1, parg2);
1276 	} else if (streq(fnnm, "atan2")){ return rval_evaluator_alloc_from_f_ff_func(f_ff_atan2_func,        parg1, parg2);
1277 	} else if (streq(fnnm, "roundm")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_roundm_func,     parg1, parg2);
1278 	} else if (streq(fnnm, "fmtnum")) { return rval_evaluator_alloc_from_s_xs_func(s_xs_fmtnum_func,     parg1, parg2);
1279 	} else if (streq(fnnm, "urandint")) { return rval_evaluator_alloc_from_i_ii_func(i_ii_urandint_func, parg1, parg2);
1280 	} else if (streq(fnnm, "sec2gmt"))  { return rval_evaluator_alloc_from_x_xi_func(s_xi_sec2gmt_func,  parg1, parg2);
1281 	} else if (streq(fnnm, "sec2localtime")) { return rval_evaluator_alloc_from_x_xi_func(s_xi_sec2localtime_func, parg1, parg2);
1282 	} else if (streq(fnnm, "&"))    { return rval_evaluator_alloc_from_x_xx_func(x_xx_band_func,         parg1, parg2);
1283 	} else if (streq(fnnm, "|"))    { return rval_evaluator_alloc_from_x_xx_func(x_xx_bor_func,          parg1, parg2);
1284 	} else if (streq(fnnm, "^"))    { return rval_evaluator_alloc_from_x_xx_func(x_xx_bxor_func,         parg1, parg2);
1285 	} else if (streq(fnnm, "<<"))   { return rval_evaluator_alloc_from_i_ii_func(i_ii_bitwise_lsh_func,  parg1, parg2);
1286 	} else if (streq(fnnm, ">>"))   { return rval_evaluator_alloc_from_i_ii_func(i_ii_bitwise_rsh_func,  parg1, parg2);
1287 	} else if (streq(fnnm, "strftime")) { return rval_evaluator_alloc_from_x_ns_func(s_ns_strftime_func, parg1, parg2);
1288 	} else if (streq(fnnm, "strftime_local")) { return rval_evaluator_alloc_from_x_ns_func(s_ns_strftime_local_func, parg1, parg2);
1289 	} else if (streq(fnnm, "strptime")) { return rval_evaluator_alloc_from_x_ss_func(i_ss_strptime_func, parg1, parg2);
1290 	} else if (streq(fnnm, "strptime_local")) { return rval_evaluator_alloc_from_x_ss_func(i_ss_strptime_local_func, parg1, parg2);
1291 	} else if (streq(fnnm, "urandrange")) { return rval_evaluator_alloc_from_f_ff_func(f_ff_urandrange_func, parg1, parg2);
1292 	} else if (streq(fnnm, "truncate"))        { return rval_evaluator_alloc_from_s_si_func(s_si_truncate_func,  parg1, parg2);
1293 	} else  { return NULL; }
1294 }
1295 
fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(char * fnnm,rval_evaluator_t * parg1,char * regex_string,int ignore_case)1296 static rval_evaluator_t* fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(char* fnnm,
1297 	rval_evaluator_t* parg1, char* regex_string, int ignore_case)
1298 {
1299 	if        (streq(fnnm, "=~"))  {
1300 		return rval_evaluator_alloc_from_x_sr_func(matches_precomp_func,        parg1, regex_string, ignore_case);
1301 	} else if (streq(fnnm, "!=~")) {
1302 		return rval_evaluator_alloc_from_x_sr_func(does_not_match_precomp_func, parg1, regex_string, ignore_case);
1303 	} else if (streq(fnnm, "regextract")) {
1304 		return rval_evaluator_alloc_from_x_se_func(regextract_precomp_func, parg1, regex_string, ignore_case);
1305 	} else  { return NULL; }
1306 }
1307 
1308 // ================================================================
fmgr_alloc_evaluator_from_ternary_func_name(char * fnnm,rval_evaluator_t * parg1,rval_evaluator_t * parg2,rval_evaluator_t * parg3)1309 static rval_evaluator_t* fmgr_alloc_evaluator_from_ternary_func_name(char* fnnm,
1310 	rval_evaluator_t* parg1, rval_evaluator_t* parg2, rval_evaluator_t* parg3)
1311 {
1312 	if (streq(fnnm, "sub")) {
1313 		return rval_evaluator_alloc_from_s_sss_func(sub_no_precomp_func,  parg1, parg2, parg3);
1314 	} else if (streq(fnnm, "gsub")) {
1315 		return rval_evaluator_alloc_from_s_sss_func(gsub_no_precomp_func, parg1, parg2, parg3);
1316 	} else if (streq(fnnm, "ssub")) {
1317 		return rval_evaluator_alloc_from_s_sss_func(s_sss_ssub_func,      parg1, parg2, parg3);
1318 	} else if (streq(fnnm, "regextract_or_else")) {
1319 		return rval_evaluator_alloc_from_s_sss_func(regextract_or_else_no_precomp_func, parg1, parg2, parg3);
1320 	} else if (streq(fnnm, "logifit")) {
1321 		return rval_evaluator_alloc_from_f_fff_func(f_fff_logifit_func,   parg1, parg2, parg3);
1322 	} else if (streq(fnnm, "madd")) {
1323 		return rval_evaluator_alloc_from_i_iii_func(i_iii_modadd_func,    parg1, parg2, parg3);
1324 	} else if (streq(fnnm, "msub")) {
1325 		return rval_evaluator_alloc_from_i_iii_func(i_iii_modsub_func,    parg1, parg2, parg3);
1326 	} else if (streq(fnnm, "mmul")) {
1327 		return rval_evaluator_alloc_from_i_iii_func(i_iii_modmul_func,    parg1, parg2, parg3);
1328 	} else if (streq(fnnm, "mexp")) {
1329 		return rval_evaluator_alloc_from_i_iii_func(i_iii_modexp_func,    parg1, parg2, parg3);
1330 	} else if (streq(fnnm, "substr")) {
1331 		return rval_evaluator_alloc_from_s_sii_func(s_sii_substr_func,    parg1, parg2, parg3);
1332 	} else if (streq(fnnm, "? :")) {
1333 		return rval_evaluator_alloc_from_ternop(parg1, parg2, parg3);
1334 	} else  { return NULL; }
1335 }
1336 
fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(char * fnnm,rval_evaluator_t * parg1,char * regex_string,int ignore_case,rval_evaluator_t * parg3)1337 static rval_evaluator_t* fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(char* fnnm,
1338 	rval_evaluator_t* parg1, char* regex_string, int ignore_case, rval_evaluator_t* parg3)
1339 {
1340 	if (streq(fnnm, "sub"))  {
1341 		return rval_evaluator_alloc_from_x_srs_func(sub_precomp_func,  parg1, regex_string, ignore_case, parg3);
1342 	} else if (streq(fnnm, "gsub"))  {
1343 		return rval_evaluator_alloc_from_x_srs_func(gsub_precomp_func, parg1, regex_string, ignore_case, parg3);
1344 	} else if (streq(fnnm, "regextract_or_else"))  {
1345 		return rval_evaluator_alloc_from_x_ses_func(regextract_or_else_precomp_func, parg1, regex_string, ignore_case, parg3);
1346 	} else  { return NULL; }
1347 }
1348 
1349 // ================================================================
construct_builtin_function_callsite_xevaluator(fmgr_t * pfmgr,unresolved_func_callsite_state_t * pcallsite)1350 static rxval_evaluator_t* construct_builtin_function_callsite_xevaluator(
1351 	fmgr_t* pfmgr,
1352 	unresolved_func_callsite_state_t* pcallsite)
1353 {
1354 	char* function_name       = pcallsite->function_name;
1355 	int   user_provided_arity = pcallsite->arity;
1356 	int   type_inferencing    = pcallsite->type_inferencing;
1357 	int   context_flags       = pcallsite->context_flags;
1358 	mlr_dsl_ast_node_t* pnode = pcallsite->pnode;
1359 
1360 	int variadic = FALSE;
1361 	fmgr_check_arity_with_report(pfmgr, function_name, user_provided_arity, &variadic);
1362 
1363 	rxval_evaluator_t* pxevaluator = NULL;
1364 	if (variadic) {
1365 		pxevaluator = fmgr_alloc_xevaluator_from_variadic_func_name(function_name, pnode->pchildren,
1366 			pfmgr, type_inferencing, context_flags);
1367 
1368 	} else if (user_provided_arity == 1) {
1369 		mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
1370 		pxevaluator = fmgr_alloc_xevaluator_from_unary_func_name(function_name, parg1_node,
1371 			pfmgr, type_inferencing, context_flags);
1372 
1373 	} else if (user_provided_arity == 2) {
1374 		mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
1375 		mlr_dsl_ast_node_t* parg2_node = pnode->pchildren->phead->pnext->pvvalue;
1376 		pxevaluator = fmgr_alloc_xevaluator_from_binary_func_name(function_name, parg1_node, parg2_node,
1377 			pfmgr, type_inferencing, context_flags);
1378 
1379 	} else if (user_provided_arity == 3) {
1380 		mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
1381 		mlr_dsl_ast_node_t* parg2_node = pnode->pchildren->phead->pnext->pvvalue;
1382 		mlr_dsl_ast_node_t* parg3_node = pnode->pchildren->phead->pnext->pnext->pvvalue;
1383 		pxevaluator = fmgr_alloc_xevaluator_from_ternary_func_name(function_name, parg1_node, parg2_node, parg3_node,
1384 			pfmgr, type_inferencing, context_flags);
1385 	}
1386 
1387 	return pxevaluator;
1388 }
1389 
1390 // ----------------------------------------------------------------
fmgr_alloc_xevaluator_from_variadic_func_name(char * function_name,sllv_t * parg_nodes,fmgr_t * pfmgr,int type_inferencing,int context_flags)1391 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_variadic_func_name(
1392 	char*               function_name,
1393 	sllv_t*             parg_nodes,
1394 	fmgr_t*             pfmgr,
1395 	int                 type_inferencing,
1396 	int                 context_flags)
1397 {
1398 	if (streq(function_name, "mapsum")) {
1399 		return rxval_evaluator_alloc_from_variadic_func(variadic_mapsum_xfunc, parg_nodes,
1400 			pfmgr, type_inferencing, context_flags);
1401 	} else if (streq(function_name, "mapdiff")) {
1402 		return rxval_evaluator_alloc_from_variadic_func(variadic_mapdiff_xfunc, parg_nodes,
1403 			pfmgr, type_inferencing, context_flags);
1404 	} else if (streq(function_name, "mapexcept")) {
1405 		return rxval_evaluator_alloc_from_variadic_func(variadic_mapexcept_xfunc, parg_nodes,
1406 			pfmgr, type_inferencing, context_flags);
1407 	} else if (streq(function_name, "mapselect")) {
1408 		return rxval_evaluator_alloc_from_variadic_func(variadic_mapselect_xfunc, parg_nodes,
1409 			pfmgr, type_inferencing, context_flags);
1410 	} else {
1411 		return NULL;
1412 	}
1413 }
1414 
1415 // ----------------------------------------------------------------
fmgr_alloc_xevaluator_from_unary_func_name(char * fnnm,mlr_dsl_ast_node_t * parg1,fmgr_t * pf,int ti,int cf)1416 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_unary_func_name(char* fnnm,
1417 	mlr_dsl_ast_node_t* parg1,
1418 	fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/)
1419 {
1420 
1421 	if (streq(fnnm, "asserting_absent")) {
1422 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_absent_no_free_xfunc, parg1, pf, ti, cf, "absent");
1423 	} else if (streq(fnnm, "asserting_bool")) {
1424 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_boolean_no_free_xfunc, parg1, pf, ti, cf, "boolean");
1425 	} else if (streq(fnnm, "asserting_boolean")) {
1426 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_boolean_no_free_xfunc, parg1, pf, ti, cf, "boolean");
1427 	} else if (streq(fnnm, "asserting_empty")) {
1428 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_empty_no_free_xfunc, parg1, pf, ti, cf, "empty");
1429 	} else if (streq(fnnm, "asserting_empty_map")) {
1430 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_empty_map_no_free_xfunc, parg1, pf, ti, cf, "empty_map");
1431 	} else if (streq(fnnm, "asserting_float")) {
1432 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_float_no_free_xfunc, parg1, pf, ti, cf, "float");
1433 	} else if (streq(fnnm, "asserting_int")) {
1434 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_int_no_free_xfunc, parg1, pf, ti, cf, "int");
1435 	} else if (streq(fnnm, "asserting_map")) {
1436 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_map_no_free_xfunc, parg1, pf, ti, cf, "map");
1437 	} else if (streq(fnnm, "asserting_nonempty_map")) {
1438 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_nonempty_map_no_free_xfunc, parg1, pf, ti, cf,
1439 			"nonempty_map");
1440 	} else if (streq(fnnm, "asserting_not_empty")) {
1441 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_not_empty_no_free_xfunc, parg1, pf, ti, cf, "not_empty");
1442 	} else if (streq(fnnm, "asserting_not_map")) {
1443 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_not_map_no_free_xfunc, parg1, pf, ti, cf, "not_map");
1444 	} else if (streq(fnnm, "asserting_not_null")) {
1445 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_not_null_no_free_xfunc, parg1, pf, ti, cf, "not_null");
1446 	} else if (streq(fnnm, "asserting_null")) {
1447 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_null_no_free_xfunc, parg1, pf, ti, cf, "null");
1448 	} else if (streq(fnnm, "asserting_numeric")) {
1449 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_numeric_no_free_xfunc, parg1, pf, ti, cf, "numeric");
1450 	} else if (streq(fnnm, "asserting_present")) {
1451 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_present_no_free_xfunc, parg1, pf, ti, cf, "present");
1452 	} else if (streq(fnnm, "asserting_string")) {
1453 		return rxval_evaluator_alloc_from_A_x_func(b_x_is_string_no_free_xfunc, parg1, pf, ti, cf, "string");
1454 
1455 	} else if (streq(fnnm, "is_absent")) {
1456 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_absent_xfunc, parg1, pf, ti, cf);
1457 	} else if (streq(fnnm, "is_bool")) {
1458 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_boolean_xfunc, parg1, pf, ti, cf);
1459 	} else if (streq(fnnm, "is_boolean")) {
1460 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_boolean_xfunc, parg1, pf, ti, cf);
1461 	} else if (streq(fnnm, "is_empty")) {
1462 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_empty_xfunc, parg1, pf, ti, cf);
1463 	} else if (streq(fnnm, "is_empty_map")) {
1464 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_empty_map_xfunc, parg1, pf, ti, cf);
1465 	} else if (streq(fnnm, "is_float")) {
1466 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_float_xfunc, parg1, pf, ti, cf);
1467 	} else if (streq(fnnm, "is_int")) {
1468 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_int_xfunc, parg1, pf, ti, cf);
1469 	} else if (streq(fnnm, "is_map")) {
1470 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_map_xfunc, parg1, pf, ti, cf);
1471 	} else if (streq(fnnm, "is_nonempty_map")) {
1472 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_nonempty_map_xfunc, parg1, pf, ti, cf);
1473 	} else if (streq(fnnm, "is_not_empty")) {
1474 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_not_empty_xfunc, parg1, pf, ti, cf);
1475 	} else if (streq(fnnm, "is_not_map")) {
1476 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_not_map_xfunc, parg1, pf, ti, cf);
1477 	} else if (streq(fnnm, "is_not_null")) {
1478 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_not_null_xfunc, parg1, pf, ti, cf);
1479 	} else if (streq(fnnm, "is_null")) {
1480 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_null_xfunc, parg1, pf, ti, cf);
1481 	} else if (streq(fnnm, "is_numeric")) {
1482 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_numeric_xfunc, parg1, pf, ti, cf);
1483 	} else if (streq(fnnm, "is_present")) {
1484 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_present_xfunc, parg1, pf, ti, cf);
1485 	} else if (streq(fnnm, "is_string")) {
1486 		return rxval_evaluator_alloc_from_x_x_func(b_x_is_string_xfunc, parg1, pf, ti, cf);
1487 
1488 	} else if (streq(fnnm, "typeof")) {
1489 		return rxval_evaluator_alloc_from_x_x_func(s_x_typeof_xfunc, parg1, pf, ti, cf);
1490 	} else if (streq(fnnm, "length")) {
1491 		return rxval_evaluator_alloc_from_x_x_func(i_x_length_xfunc, parg1, pf, ti, cf);
1492 	} else if (streq(fnnm, "depth")) {
1493 		return rxval_evaluator_alloc_from_x_x_func(i_x_depth_xfunc, parg1, pf, ti, cf);
1494 	} else if (streq(fnnm, "leafcount")) {
1495 		return rxval_evaluator_alloc_from_x_x_func(i_x_leafcount_xfunc, parg1, pf, ti, cf);
1496 	} else {
1497 		return NULL;
1498 	}
1499 }
1500 
1501 // ----------------------------------------------------------------
fmgr_alloc_xevaluator_from_binary_func_name(char * fnnm,mlr_dsl_ast_node_t * parg1,mlr_dsl_ast_node_t * parg2,fmgr_t * pf,int ti,int cf)1502 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_binary_func_name(char* fnnm,
1503 	mlr_dsl_ast_node_t* parg1, mlr_dsl_ast_node_t* parg2,
1504 	fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/)
1505 {
1506 	if (streq(fnnm, "haskey")) {
1507 		return rxval_evaluator_alloc_from_x_mx_func(b_xx_haskey_xfunc, parg1, parg2, pf, ti, cf);
1508 	} else if (streq(fnnm, "splitnv")) {
1509 		return rxval_evaluator_alloc_from_x_ss_func(m_ss_splitnv_xfunc, parg1, parg2, pf, ti, cf);
1510 	} else if (streq(fnnm, "splitnvx")) {
1511 		return rxval_evaluator_alloc_from_x_ss_func(m_ss_splitnvx_xfunc, parg1, parg2, pf, ti, cf);
1512 	} else if (streq(fnnm, "joink")) {
1513 		return rxval_evaluator_alloc_from_x_ms_func(s_ms_joink_xfunc, parg1, parg2, pf, ti, cf);
1514 	} else if (streq(fnnm, "joinv")) {
1515 		return rxval_evaluator_alloc_from_x_ms_func(s_ms_joinv_xfunc, parg1, parg2, pf, ti, cf);
1516 	} else {
1517 		return NULL;
1518 	}
1519 }
1520 
1521 // ----------------------------------------------------------------
fmgr_alloc_xevaluator_from_ternary_func_name(char * fnnm,mlr_dsl_ast_node_t * parg1,mlr_dsl_ast_node_t * parg2,mlr_dsl_ast_node_t * parg3,fmgr_t * pf,int ti,int cf)1522 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_ternary_func_name(char* fnnm,
1523 	mlr_dsl_ast_node_t* parg1, mlr_dsl_ast_node_t* parg2, mlr_dsl_ast_node_t* parg3,
1524 	fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/)
1525 {
1526 	if (streq(fnnm, "joinkv")) {
1527 		return rxval_evaluator_alloc_from_x_mss_func(s_mss_joinkv_xfunc, parg1, parg2, parg3, pf, ti, cf);
1528 	} else if (streq(fnnm, "splitkv")) {
1529 		return rxval_evaluator_alloc_from_x_sss_func(m_sss_splitkv_xfunc, parg1, parg2, parg3, pf, ti, cf);
1530 	} else if (streq(fnnm, "splitkvx")) {
1531 		return rxval_evaluator_alloc_from_x_sss_func(m_sss_splitkvx_xfunc, parg1, parg2, parg3, pf, ti, cf);
1532 	} else {
1533 		return NULL;
1534 	}
1535 }
1536 
1537 // ================================================================
1538 // Return value is in scalar context.
resolve_func_callsite(fmgr_t * pfmgr,rval_evaluator_t * pev)1539 static void resolve_func_callsite(fmgr_t* pfmgr, rval_evaluator_t* pev) {
1540 	unresolved_func_callsite_state_t* pcallsite = pev->pvstate;
1541 
1542 	rval_evaluator_t* pevaluator = construct_udf_callsite_evaluator(pfmgr, pcallsite);
1543 	if (pevaluator != NULL) {
1544 		// Struct assignment into the callsite space
1545 		*pev = *pevaluator;
1546 		free(pevaluator);
1547 		return;
1548 	}
1549 
1550 	// Really there are map-in,map-out, map-in,scalar-out, and
1551 	// scalar-in,scalar-out: and actually even more subtle, e.g. the join
1552 	// functions take a mix of map and string arguments.  What we have
1553 	// internally are builtin function evaluators (scalars only) and builtin
1554 	// function xevaluators (at least one argument, and/or retval, is a map).
1555 	rxval_evaluator_t* pxevaluator = construct_builtin_function_callsite_xevaluator(pfmgr, pcallsite);
1556 	if (pxevaluator != NULL) {
1557 		pevaluator = fmgr_alloc_eval_wrapping_xeval(pxevaluator);
1558 		*pev = *pevaluator;
1559 		free(pevaluator);
1560 		return;
1561 	}
1562 
1563 	pevaluator = construct_builtin_function_callsite_evaluator(pfmgr, pcallsite);
1564 	if (pevaluator != NULL) {
1565 		*pev = *pevaluator;
1566 		free(pevaluator);
1567 		return;
1568 	}
1569 
1570 	fprintf(stderr, "Miller: unrecognized function name \"%s\".\n", pcallsite->function_name);
1571 	exit(1);
1572 }
1573 
1574 // ----------------------------------------------------------------
1575 // Return value is in map context.
resolve_func_xcallsite(fmgr_t * pfmgr,rxval_evaluator_t * pxev)1576 static void resolve_func_xcallsite(fmgr_t* pfmgr, rxval_evaluator_t* pxev) {
1577 	unresolved_func_callsite_state_t* pcallsite = pxev->pvstate;
1578 
1579 	rxval_evaluator_t* pxevaluator = construct_udf_defsite_xevaluator(pfmgr, pcallsite);
1580 	if (pxevaluator != NULL) {
1581 		// Struct assignment into the callsite space
1582 		*pxev = *pxevaluator;
1583 		free(pxevaluator);
1584 		return;
1585 	}
1586 
1587 	pxevaluator = construct_builtin_function_callsite_xevaluator(pfmgr, pcallsite);
1588 	if (pxevaluator != NULL) {
1589 		*pxev = *pxevaluator;
1590 		free(pxevaluator);
1591 		return;
1592 	}
1593 
1594 	rval_evaluator_t* pevaluator = construct_builtin_function_callsite_evaluator(pfmgr, pcallsite);
1595 	pxevaluator = fmgr_alloc_xeval_wrapping_eval(pevaluator);
1596 	if (pxevaluator != NULL) {
1597 		*pxev = *pxevaluator;
1598 		free(pxevaluator);
1599 		return;
1600 	}
1601 
1602 	fprintf(stderr, "Miller: unrecognized function name \"%s\".\n", pcallsite->function_name);
1603 	exit(1);
1604 }
1605