1 #include "lib/mlr_globals.h"
2 #include "lib/mlrutil.h"
3 #include "dsl/function_manager.h"
4 #include "dsl/context_flags.h"
5 #include "dsl/rval_evaluators.h"
6 #include "dsl/rxval_evaluators.h"
7
8 // ----------------------------------------------------------------
9 typedef enum _func_class_t {
10 FUNC_CLASS_ARITHMETIC,
11 FUNC_CLASS_MATH,
12 FUNC_CLASS_BOOLEAN,
13 FUNC_CLASS_STRING,
14 FUNC_CLASS_CONVERSION,
15 FUNC_CLASS_TYPING,
16 FUNC_CLASS_MAPS,
17 FUNC_CLASS_TIME
18 } func_class_t;
19
20 typedef enum _arity_check_t {
21 ARITY_CHECK_PASS,
22 ARITY_CHECK_FAIL,
23 ARITY_CHECK_NO_SUCH
24 } arity_check_t;
25
26 typedef struct _function_lookup_t {
27 func_class_t function_class;
28 char* function_name;
29 int arity; // for variadic, this is minimum arity
30 int variadic;
31 char* usage_string;
32 } function_lookup_t;
33
34 // This is shared between all instances
35 static function_lookup_t FUNCTION_LOOKUP_TABLE[];
36
37 // ----------------------------------------------------------------
38 // See also comments in rval_evaluators.h
39
40 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
41 static void fmgr_check_arity_with_report(fmgr_t* pfmgr, char* function_name,
42 int user_provided_arity, int* pvariadic);
43
44 static rval_evaluator_t* fmgr_alloc_evaluator_from_variadic_func_name(
45 char* function_name, rval_evaluator_t** pargs, int nargs);
46
47 static rval_evaluator_t* fmgr_alloc_evaluator_from_zary_func_name(
48 char* function_name);
49
50 static rval_evaluator_t* fmgr_alloc_evaluator_from_unary_func_name(
51 char* function_name, rval_evaluator_t* parg1);
52
53 static rval_evaluator_t* fmgr_alloc_evaluator_from_binary_func_name(
54 char* function_name,
55 rval_evaluator_t* parg1, rval_evaluator_t* parg2);
56
57 static rval_evaluator_t* fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(
58 char* function_name,
59 rval_evaluator_t* parg1, char* regex_string, int ignore_case);
60
61 static rval_evaluator_t* fmgr_alloc_evaluator_from_ternary_func_name(
62 char* function_name,
63 rval_evaluator_t* parg1, rval_evaluator_t* parg2, rval_evaluator_t* parg3);
64
65 static rval_evaluator_t* fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(
66 char* function_name,
67 rval_evaluator_t* parg1, char* regex_string, int ignore_case, rval_evaluator_t* parg3);
68
69 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
70 // For rval functions, we pass rval_evaluator_t* (CST); for rxval functions, we pass
71 // mlr_dsl_ast_node_t* (AST). It's easy to construct the former from the latter, of
72 // course. The difference is that we look up map-enabled functions by name first,
73 // then non-map-enabled functions by name second.
74 //
75 // * AST nodes are passed to try to look up a map-enabled function given a function name.
76 // * If those exist, they construct CST structures and return.
77 // * But if not, we look up a non-map-enabled function for the same function name.
78 // * If that doesn't exist either, then it's a fatal error. So we go ahead and
79 // construct an rval_evaluator_t* CST structure from the AST node simply to
80 // save keystrokes, passing that to the function-lookup routines.
81 //
82 // It would simpler to always construct CST structures before looking up
83 // function names, but the only problem is that it's hard to unconstruct CST
84 // structures in case the name lookup fails. (The function-manager
85 // as-yet-unresolved-name list points into them, whenever function arguments
86 // themselves include function calls). Namely, the following scenario is to be
87 // avoided:
88 //
89 // * Construct rxval_evaluator_t* CST structure.
90 // * Look up map-enabled function with a given name.
91 // * That doesn't exist.
92 // * Now the rxval_evaluator_t* can't be torn down since the fmgr points into it.
93
94 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_variadic_func_name(
95 char* function_name, sllv_t* parg_nodes,
96 fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/);
97
98 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_unary_func_name(
99 char* function_name,
100 mlr_dsl_ast_node_t* parg1,
101 fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/);
102
103 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_binary_func_name(
104 char* function_name,
105 mlr_dsl_ast_node_t* parg1, mlr_dsl_ast_node_t* pargs2,
106 fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/);
107
108 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_ternary_func_name(
109 char* function_name,
110 mlr_dsl_ast_node_t* parg1, mlr_dsl_ast_node_t* pargs2, mlr_dsl_ast_node_t* pargs3,
111 fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/);
112
113 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
114 static void resolve_func_callsite(fmgr_t* pfmgr, rval_evaluator_t* pev);
115 static void resolve_func_xcallsite(fmgr_t* pfmgr, rxval_evaluator_t* pxev);
116 static rxval_evaluator_t* fmgr_alloc_xeval_wrapping_eval(rval_evaluator_t* pevaluator);
117 static rval_evaluator_t* fmgr_alloc_eval_wrapping_xeval(rxval_evaluator_t* pxevaluator);
118
119 // ----------------------------------------------------------------
fmgr_alloc()120 fmgr_t* fmgr_alloc() {
121 fmgr_t* pfmgr = mlr_malloc_or_die(sizeof(fmgr_t));
122
123 pfmgr->function_lookup_table = &FUNCTION_LOOKUP_TABLE[0];
124
125 pfmgr->built_in_function_names = hss_alloc();
126 for (int i = 0; ; i++) {
127 function_lookup_t* plookup = &pfmgr->function_lookup_table[i];
128 char* fname = plookup->function_name;
129 if (fname == NULL)
130 break;
131 hss_add(pfmgr->built_in_function_names, fname);
132 }
133
134 pfmgr->pudf_names_to_defsite_states = lhmsv_alloc();
135
136 pfmgr->pfunc_callsite_evaluators_to_resolve = sllv_alloc();
137 pfmgr->pfunc_callsite_xevaluators_to_resolve = sllv_alloc();
138
139 return pfmgr;
140 }
141
142 // ----------------------------------------------------------------
fmgr_free(fmgr_t * pfmgr,context_t * pctx)143 void fmgr_free(fmgr_t* pfmgr, context_t* pctx) {
144 if (pfmgr == NULL)
145 return;
146
147 for (lhmsve_t* pe = pfmgr->pudf_names_to_defsite_states->phead; pe != NULL; pe = pe->pnext) {
148 udf_defsite_state_t * pdefsite_state = pe->pvvalue;
149 free(pdefsite_state->name);
150 pdefsite_state->pfree_func(pdefsite_state->pvstate, pctx);
151 free(pdefsite_state);
152 }
153 lhmsv_free(pfmgr->pudf_names_to_defsite_states);
154 sllv_free(pfmgr->pfunc_callsite_evaluators_to_resolve);
155 sllv_free(pfmgr->pfunc_callsite_xevaluators_to_resolve);
156 hss_free(pfmgr->built_in_function_names);
157 free(pfmgr);
158 }
159
160 // ----------------------------------------------------------------
fmgr_install_udf(fmgr_t * pfmgr,udf_defsite_state_t * pdefsite_state)161 void fmgr_install_udf(fmgr_t* pfmgr, udf_defsite_state_t* pdefsite_state) {
162 if (hss_has(pfmgr->built_in_function_names, pdefsite_state->name)) {
163 fprintf(stderr, "%s: function named \"%s\" must not override a built-in function of the same name.\n",
164 MLR_GLOBALS.bargv0, pdefsite_state->name);
165 exit(1);
166 }
167 if (lhmsv_get(pfmgr->pudf_names_to_defsite_states, pdefsite_state->name)) {
168 fprintf(stderr, "%s: function named \"%s\" has already been defined.\n",
169 MLR_GLOBALS.bargv0, pdefsite_state->name);
170 exit(1);
171 }
172 lhmsv_put(pfmgr->pudf_names_to_defsite_states, mlr_strdup_or_die(pdefsite_state->name), pdefsite_state,
173 FREE_ENTRY_KEY);
174 }
175
176 // ================================================================
177 static function_lookup_t FUNCTION_LOOKUP_TABLE[] = {
178
179 {FUNC_CLASS_ARITHMETIC, "+", 2,0, "Addition."},
180 {FUNC_CLASS_ARITHMETIC, "+", 1,0, "Unary plus."},
181 {FUNC_CLASS_ARITHMETIC, "-", 2,0, "Subtraction."},
182 {FUNC_CLASS_ARITHMETIC, "-", 1,0, "Unary minus."},
183 {FUNC_CLASS_ARITHMETIC, "*", 2,0, "Multiplication."},
184 {FUNC_CLASS_ARITHMETIC, "/", 2,0, "Division."},
185 {FUNC_CLASS_ARITHMETIC, "//", 2,0, "Integer division: rounds to negative (pythonic)."},
186
187 {FUNC_CLASS_ARITHMETIC, ".+", 2,0, "Addition, with integer-to-integer overflow"},
188 {FUNC_CLASS_ARITHMETIC, ".+", 1,0, "Unary plus, with integer-to-integer overflow."},
189 {FUNC_CLASS_ARITHMETIC, ".-", 2,0, "Subtraction, with integer-to-integer overflow."},
190 {FUNC_CLASS_ARITHMETIC, ".-", 1,0, "Unary minus, with integer-to-integer overflow."},
191 {FUNC_CLASS_ARITHMETIC, ".*", 2,0, "Multiplication, with integer-to-integer overflow."},
192 {FUNC_CLASS_ARITHMETIC, "./", 2,0, "Division, with integer-to-integer overflow."},
193 {FUNC_CLASS_ARITHMETIC, ".//", 2,0, "Integer division: rounds to negative (pythonic), with integer-to-integer overflow."},
194
195 {FUNC_CLASS_ARITHMETIC, "%", 2,0, "Remainder; never negative-valued (pythonic)."},
196 {FUNC_CLASS_ARITHMETIC, "**", 2,0, "Exponentiation; same as pow, but as an infix\noperator."},
197 {FUNC_CLASS_ARITHMETIC, "|", 2,0, "Bitwise OR."},
198 {FUNC_CLASS_ARITHMETIC, "^", 2,0, "Bitwise XOR."},
199 {FUNC_CLASS_ARITHMETIC, "&", 2,0, "Bitwise AND."},
200 {FUNC_CLASS_ARITHMETIC, "~", 1,0,
201 "Bitwise NOT. Beware '$y=~$x' since =~ is the\nregex-match operator: try '$y = ~$x'."},
202 {FUNC_CLASS_ARITHMETIC, "<<", 2,0, "Bitwise left-shift."},
203 {FUNC_CLASS_ARITHMETIC, ">>", 2,0, "Bitwise right-shift."},
204 {FUNC_CLASS_ARITHMETIC, "bitcount", 1,0, "Count of 1-bits"},
205
206 {FUNC_CLASS_BOOLEAN, "==", 2,0, "String/numeric equality. Mixing number and string\nresults in string compare."},
207 {FUNC_CLASS_BOOLEAN, "!=", 2,0, "String/numeric inequality. Mixing number and string\nresults in string compare."},
208 {FUNC_CLASS_BOOLEAN, "=~", 2,0,
209 "String (left-hand side) matches regex (right-hand\n"
210 "side), e.g. '$name =~ \"^a.*b$\"'."},
211 {FUNC_CLASS_BOOLEAN, "!=~", 2,0,
212 "String (left-hand side) does not match regex\n"
213 "(right-hand side), e.g. '$name !=~ \"^a.*b$\"'."},
214 {FUNC_CLASS_BOOLEAN, ">", 2,0,
215 "String/numeric greater-than. Mixing number and string\n"
216 "results in string compare."},
217 {FUNC_CLASS_BOOLEAN, ">=", 2,0,
218 "String/numeric greater-than-or-equals. Mixing number\n"
219 "and string results in string compare."},
220 {FUNC_CLASS_BOOLEAN, "<", 2,0,
221 "String/numeric less-than. Mixing number and string\n"
222 "results in string compare."},
223 {FUNC_CLASS_BOOLEAN, "<=", 2,0,
224 "String/numeric less-than-or-equals. Mixing number\n"
225 "and string results in string compare."},
226 {FUNC_CLASS_BOOLEAN, "&&", 2,0, "Logical AND."},
227 {FUNC_CLASS_BOOLEAN, "||", 2,0, "Logical OR."},
228 {FUNC_CLASS_BOOLEAN, "^^", 2,0, "Logical XOR."},
229 {FUNC_CLASS_BOOLEAN, "!", 1,0, "Logical negation."},
230 {FUNC_CLASS_BOOLEAN, "? :", 3,0, "Ternary operator."},
231
232 {FUNC_CLASS_STRING, ".", 2,0, "String concatenation."},
233 {FUNC_CLASS_STRING, "gsub", 3,0, "Example: '$name=gsub($name, \"old\", \"new\")'\n(replace all)."},
234 {FUNC_CLASS_STRING, "regextract", 2,0, "Example: '$name=regextract($name, \"[A-Z]{3}[0-9]{2}\")'\n."},
235 {FUNC_CLASS_STRING, "regextract_or_else", 3,0, "Example: '$name=regextract_or_else($name, \"[A-Z]{3}[0-9]{2}\", \"default\")'\n."},
236 {FUNC_CLASS_STRING, "strlen", 1,0, "String length."},
237 {FUNC_CLASS_STRING, "sub", 3,0, "Example: '$name=sub($name, \"old\", \"new\")'\n(replace once)."},
238 {FUNC_CLASS_STRING, "ssub", 3,0, "Like sub but does no regexing. No characters are special."},
239 {FUNC_CLASS_STRING, "substr", 3,0,
240 "substr(s,m,n) gives substring of s from 0-up position m to n \n"
241 "inclusive. Negative indices -len .. -1 alias to 0 .. len-1."},
242 {FUNC_CLASS_STRING, "tolower", 1,0, "Convert string to lowercase."},
243 {FUNC_CLASS_STRING, "toupper", 1,0, "Convert string to uppercase."},
244 {FUNC_CLASS_STRING, "truncate", 2,0, "Truncates string first argument to max length of int second argument."},
245 {FUNC_CLASS_STRING, "capitalize", 1,0, "Convert string's first character to uppercase."},
246 {FUNC_CLASS_STRING, "lstrip", 1,0, "Strip leading whitespace from string."},
247 {FUNC_CLASS_STRING, "rstrip", 1,0, "Strip trailing whitespace from string."},
248 {FUNC_CLASS_STRING, "strip", 1,0, "Strip leading and trailing whitespace from string."},
249 {FUNC_CLASS_STRING, "collapse_whitespace", 1,0, "Strip repeated whitespace from string."},
250 {FUNC_CLASS_STRING, "clean_whitespace", 1,0, "Same as collapse_whitespace and strip."},
251 {FUNC_CLASS_STRING, "system", 1,0, "Run command string, yielding its stdout minus final carriage return."},
252
253 {FUNC_CLASS_MATH, "abs", 1,0, "Absolute value."},
254 {FUNC_CLASS_MATH, "acos", 1,0, "Inverse trigonometric cosine."},
255 {FUNC_CLASS_MATH, "acosh", 1,0, "Inverse hyperbolic cosine."},
256 {FUNC_CLASS_MATH, "asin", 1,0, "Inverse trigonometric sine."},
257 {FUNC_CLASS_MATH, "asinh", 1,0, "Inverse hyperbolic sine."},
258 {FUNC_CLASS_MATH, "atan", 1,0, "One-argument arctangent."},
259 {FUNC_CLASS_MATH, "atan2", 2,0, "Two-argument arctangent."},
260 {FUNC_CLASS_MATH, "atanh", 1,0, "Inverse hyperbolic tangent."},
261 {FUNC_CLASS_MATH, "cbrt", 1,0, "Cube root."},
262 {FUNC_CLASS_MATH, "ceil", 1,0, "Ceiling: nearest integer at or above."},
263 {FUNC_CLASS_MATH, "cos", 1,0, "Trigonometric cosine."},
264 {FUNC_CLASS_MATH, "cosh", 1,0, "Hyperbolic cosine."},
265 {FUNC_CLASS_MATH, "erf", 1,0, "Error function."},
266 {FUNC_CLASS_MATH, "erfc", 1,0, "Complementary error function."},
267 {FUNC_CLASS_MATH, "exp", 1,0, "Exponential function e**x."},
268 {FUNC_CLASS_MATH, "expm1", 1,0, "e**x - 1."},
269 {FUNC_CLASS_MATH, "floor", 1,0, "Floor: nearest integer at or below."},
270 // See also http://johnkerl.org/doc/randuv.pdf for more about urand() -> other distributions
271 {FUNC_CLASS_MATH, "invqnorm", 1,0,
272 "Inverse of normal cumulative distribution\n"
273 "function. Note that invqorm(urand()) is normally distributed."},
274 {FUNC_CLASS_MATH, "log", 1,0, "Natural (base-e) logarithm."},
275 {FUNC_CLASS_MATH, "log10", 1,0, "Base-10 logarithm."},
276 {FUNC_CLASS_MATH, "log1p", 1,0, "log(1-x)."},
277 {FUNC_CLASS_MATH, "logifit", 3,0, "Given m and b from logistic regression, compute\nfit: $yhat=logifit($x,$m,$b)."},
278 {FUNC_CLASS_MATH, "madd", 3,0, "a + b mod m (integers)"},
279 {FUNC_CLASS_MATH, "max", 0,1, "max of n numbers; null loses"},
280 {FUNC_CLASS_MATH, "mexp", 3,0, "a ** b mod m (integers)"},
281 {FUNC_CLASS_MATH, "min", 0,1, "Min of n numbers; null loses"},
282 {FUNC_CLASS_MATH, "mmul", 3,0, "a * b mod m (integers)"},
283 {FUNC_CLASS_MATH, "msub", 3,0, "a - b mod m (integers)"},
284 {FUNC_CLASS_MATH, "pow", 2,0, "Exponentiation; same as **."},
285 {FUNC_CLASS_MATH, "qnorm", 1,0, "Normal cumulative distribution function."},
286 {FUNC_CLASS_MATH, "round", 1,0, "Round to nearest integer."},
287 {FUNC_CLASS_MATH, "roundm", 2,0, "Round to nearest multiple of m: roundm($x,$m) is\nthe same as round($x/$m)*$m"},
288 {FUNC_CLASS_MATH, "sgn", 1,0, "+1 for positive input, 0 for zero input, -1 for\nnegative input."},
289 {FUNC_CLASS_MATH, "sin", 1,0, "Trigonometric sine."},
290 {FUNC_CLASS_MATH, "sinh", 1,0, "Hyperbolic sine."},
291 {FUNC_CLASS_MATH, "sqrt", 1,0, "Square root."},
292 {FUNC_CLASS_MATH, "tan", 1,0, "Trigonometric tangent."},
293 {FUNC_CLASS_MATH, "tanh", 1,0, "Hyperbolic tangent."},
294 {FUNC_CLASS_MATH, "urand", 0,0,
295 "Floating-point numbers uniformly distributed on the unit interval.\n"
296 "Int-valued example: '$n=floor(20+urand()*11)'." },
297 {FUNC_CLASS_MATH, "urandrange", 2,0,
298 "Floating-point numbers uniformly distributed on the interval [a, b)." },
299 {FUNC_CLASS_MATH, "urand32", 0,0, "Integer uniformly distributed 0 and 2**32-1\n"
300 "inclusive." },
301 {FUNC_CLASS_MATH, "urandint", 2,0, "Integer uniformly distributed between inclusive\ninteger endpoints." },
302
303 {FUNC_CLASS_TIME, "dhms2fsec", 1,0,
304 "Recovers floating-point seconds as in\n"
305 "dhms2fsec(\"5d18h53m20.250000s\") = 500000.250000"},
306 {FUNC_CLASS_TIME, "dhms2sec", 1,0, "Recovers integer seconds as in\ndhms2sec(\"5d18h53m20s\") = 500000"},
307 {FUNC_CLASS_TIME, "fsec2dhms", 1,0,
308 "Formats floating-point seconds as in\nfsec2dhms(500000.25) = \"5d18h53m20.250000s\""},
309 {FUNC_CLASS_TIME, "fsec2hms", 1,0,
310 "Formats floating-point seconds as in\nfsec2hms(5000.25) = \"01:23:20.250000\""},
311
312 {FUNC_CLASS_TIME, "gmt2sec", 1,0, "Parses GMT timestamp as integer seconds since\nthe epoch."},
313 {FUNC_CLASS_TIME, "localtime2sec", 1,0, "Parses local timestamp as integer seconds since\n"
314 "the epoch. Consults $TZ environment variable."},
315
316 {FUNC_CLASS_TIME, "hms2fsec", 1,0,
317 "Recovers floating-point seconds as in\nhms2fsec(\"01:23:20.250000\") = 5000.250000"},
318 {FUNC_CLASS_TIME, "hms2sec", 1,0, "Recovers integer seconds as in\nhms2sec(\"01:23:20\") = 5000"},
319 {FUNC_CLASS_TIME, "sec2dhms", 1,0, "Formats integer seconds as in sec2dhms(500000)\n= \"5d18h53m20s\""},
320
321 {FUNC_CLASS_TIME, "sec2gmt", 1,0,
322 "Formats seconds since epoch (integer part)\n"
323 "as GMT timestamp, e.g. sec2gmt(1440768801.7) = \"2015-08-28T13:33:21Z\".\n"
324 "Leaves non-numbers as-is."},
325 {FUNC_CLASS_TIME, "sec2gmt", 2,0,
326 "Formats seconds since epoch as GMT timestamp with n\n"
327 "decimal places for seconds, e.g. sec2gmt(1440768801.7,1) = \"2015-08-28T13:33:21.7Z\".\n"
328 "Leaves non-numbers as-is."},
329 {FUNC_CLASS_TIME, "sec2gmtdate", 1,0,
330 "Formats seconds since epoch (integer part)\n"
331 "as GMT timestamp with year-month-date, e.g. sec2gmtdate(1440768801.7) = \"2015-08-28\".\n"
332 "Leaves non-numbers as-is."},
333
334 {FUNC_CLASS_TIME, "sec2localtime", 1,0, "Formats seconds since epoch (integer part)\n"
335 "as local timestamp, e.g. sec2localtime(1440768801.7) = \"2015-08-28T13:33:21Z\".\n"
336 "Consults $TZ environment variable. Leaves non-numbers as-is."},
337 {FUNC_CLASS_TIME, "sec2localtime", 2,0,
338 "Formats seconds since epoch as local timestamp with n\n"
339 "decimal places for seconds, e.g. sec2localtime(1440768801.7,1) = \"2015-08-28T13:33:21.7Z\".\n"
340 "Consults $TZ environment variable. Leaves non-numbers as-is."},
341 {FUNC_CLASS_TIME, "sec2localdate", 1,0,
342 "Formats seconds since epoch (integer part)\n"
343 "as local timestamp with year-month-date, e.g. sec2localdate(1440768801.7) = \"2015-08-28\".\n"
344 "Consults $TZ environment variable. Leaves non-numbers as-is."},
345
346 {FUNC_CLASS_TIME, "sec2hms", 1,0,
347 "Formats integer seconds as in\n"
348 "sec2hms(5000) = \"01:23:20\""},
349 {FUNC_CLASS_TIME, "strftime", 2,0,
350 "Formats seconds since the epoch as timestamp, e.g.\n"
351 "strftime(1440768801.7,\"%Y-%m-%dT%H:%M:%SZ\") = \"2015-08-28T13:33:21Z\", and\n"
352 "strftime(1440768801.7,\"%Y-%m-%dT%H:%M:%3SZ\") = \"2015-08-28T13:33:21.700Z\".\n"
353 "Format strings are as in the C library (please see \"man strftime\" on your system),\n"
354 "with the Miller-specific addition of \"%1S\" through \"%9S\" which format the seconds\n"
355 "with 1 through 9 decimal places, respectively. (\"%S\" uses no decimal places.)\n"
356 "See also strftime_local."},
357 {FUNC_CLASS_TIME, "strftime_local", 2,0,
358 "Like strftime but consults the $TZ environment variable to get local time zone."},
359 {FUNC_CLASS_TIME, "strptime", 2,0,
360 "Parses timestamp as floating-point seconds since the epoch,\n"
361 "e.g. strptime(\"2015-08-28T13:33:21Z\",\"%Y-%m-%dT%H:%M:%SZ\") = 1440768801.000000,\n"
362 "and strptime(\"2015-08-28T13:33:21.345Z\",\"%Y-%m-%dT%H:%M:%SZ\") = 1440768801.345000.\n"
363 "See also strptime_local."},
364 {FUNC_CLASS_TIME, "strptime_local", 2,0,
365 "Like strptime, but consults $TZ environment variable to find and use local timezone."},
366 {FUNC_CLASS_TIME, "systime", 0,0,
367 "Floating-point seconds since the epoch,\n"
368 "e.g. 1440768801.748936." },
369
370 {FUNC_CLASS_TYPING, "is_absent", 1,0, "False if field is present in input, true otherwise"},
371 {FUNC_CLASS_TYPING, "is_bool", 1,0, "True if field is present with boolean value. Synonymous with is_boolean."},
372 {FUNC_CLASS_TYPING, "is_boolean", 1,0, "True if field is present with boolean value. Synonymous with is_bool."},
373 {FUNC_CLASS_TYPING, "is_empty", 1,0, "True if field is present in input with empty string value, false otherwise."},
374 {FUNC_CLASS_TYPING, "is_empty_map", 1,0, "True if argument is a map which is empty."},
375 {FUNC_CLASS_TYPING, "is_float", 1,0, "True if field is present with value inferred to be float"},
376 {FUNC_CLASS_TYPING, "is_int", 1,0, "True if field is present with value inferred to be int "},
377 {FUNC_CLASS_TYPING, "is_map", 1,0, "True if argument is a map."},
378 {FUNC_CLASS_TYPING, "is_nonempty_map", 1,0, "True if argument is a map which is non-empty."},
379 {FUNC_CLASS_TYPING, "is_not_empty", 1,0, "False if field is present in input with empty value, true otherwise"},
380 {FUNC_CLASS_TYPING, "is_not_map", 1,0, "True if argument is not a map."},
381 {FUNC_CLASS_TYPING, "is_not_null", 1,0, "False if argument is null (empty or absent), true otherwise."},
382 {FUNC_CLASS_TYPING, "is_null", 1,0, "True if argument is null (empty or absent), false otherwise."},
383 {FUNC_CLASS_TYPING, "is_numeric", 1,0, "True if field is present with value inferred to be int or float"},
384 {FUNC_CLASS_TYPING, "is_present", 1,0, "True if field is present in input, false otherwise."},
385 {FUNC_CLASS_TYPING, "is_string", 1,0, "True if field is present with string (including empty-string) value"},
386
387 {FUNC_CLASS_TYPING, "asserting_absent", 1,0, "Returns argument if it is absent in the input data, else\n"
388 "throws an error."},
389 {FUNC_CLASS_TYPING, "asserting_bool", 1,0, "Returns argument if it is present with boolean value, else\n"
390 "throws an error."},
391 {FUNC_CLASS_TYPING, "asserting_boolean", 1,0, "Returns argument if it is present with boolean value, else\n"
392 "throws an error."},
393 {FUNC_CLASS_TYPING, "asserting_empty", 1,0, "Returns argument if it is present in input with empty value,\n"
394 "else throws an error."},
395 {FUNC_CLASS_TYPING, "asserting_empty_map", 1,0, "Returns argument if it is a map with empty value, else\n"
396 "throws an error."},
397 {FUNC_CLASS_TYPING, "asserting_float", 1,0, "Returns argument if it is present with float value, else\n"
398 "throws an error."},
399 {FUNC_CLASS_TYPING, "asserting_int", 1,0, "Returns argument if it is present with int value, else\n"
400 "throws an error."},
401 {FUNC_CLASS_TYPING, "asserting_map", 1,0, "Returns argument if it is a map, else throws an error."},
402 {FUNC_CLASS_TYPING, "asserting_nonempty_map", 1,0, "Returns argument if it is a non-empty map, else throws\n"
403 "an error."},
404 {FUNC_CLASS_TYPING, "asserting_not_empty", 1,0, "Returns argument if it is present in input with non-empty\n"
405 "value, else throws an error."},
406 {FUNC_CLASS_TYPING, "asserting_not_map", 1,0, "Returns argument if it is not a map, else throws an error."},
407 {FUNC_CLASS_TYPING, "asserting_not_null", 1,0, "Returns argument if it is non-null (non-empty and non-absent),\n"
408 "else throws an error."},
409 {FUNC_CLASS_TYPING, "asserting_null", 1,0, "Returns argument if it is null (empty or absent), else throws\n"
410 "an error."},
411 {FUNC_CLASS_TYPING, "asserting_numeric", 1,0, "Returns argument if it is present with int or float value,\n"
412 "else throws an error."},
413 {FUNC_CLASS_TYPING, "asserting_present", 1,0, "Returns argument if it is present in input, else throws\n"
414 "an error."},
415 {FUNC_CLASS_TYPING, "asserting_string", 1,0, "Returns argument if it is present with string (including\n"
416 "empty-string) value, else throws an error."},
417
418 {FUNC_CLASS_CONVERSION, "boolean", 1,0, "Convert int/float/bool/string to boolean."},
419 {FUNC_CLASS_CONVERSION, "float", 1,0, "Convert int/float/bool/string to float."},
420 {FUNC_CLASS_CONVERSION, "fmtnum", 2,0,
421 "Convert int/float/bool to string using\n"
422 "printf-style format string, e.g. '$s = fmtnum($n, \"%06lld\")'. WARNING: Miller numbers\n"
423 "are all long long or double. If you use formats like %d or %f, behavior is undefined."},
424 {FUNC_CLASS_CONVERSION, "hexfmt", 1,0, "Convert int to string, e.g. 255 to \"0xff\"."},
425 {FUNC_CLASS_CONVERSION, "int", 1,0, "Convert int/float/bool/string to int."},
426 {FUNC_CLASS_CONVERSION, "string", 1,0, "Convert int/float/bool/string to string."},
427 {FUNC_CLASS_CONVERSION, "typeof", 1,0,
428 "Convert argument to type of argument (e.g.\n"
429 "MT_STRING). For debug."},
430
431 {FUNC_CLASS_MAPS, "depth", 1,0, "Prints maximum depth of hashmap: ''. Scalars have depth 0."},
432 {FUNC_CLASS_MAPS, "haskey", 2,0, "True/false if map has/hasn't key, e.g. 'haskey($*, \"a\")' or\n"
433 "'haskey(mymap, mykey)'. Error if 1st argument is not a map."},
434 {FUNC_CLASS_MAPS, "joink", 2,0, "Makes string from map keys. E.g. 'joink($*, \",\")'."},
435 {FUNC_CLASS_MAPS, "joinkv", 3,0, "Makes string from map key-value pairs. E.g. 'joinkv(@v[2], \"=\", \",\")'"},
436 {FUNC_CLASS_MAPS, "joinv", 2,0, "Makes string from map values. E.g. 'joinv(mymap, \",\")'."},
437 {FUNC_CLASS_MAPS, "leafcount", 1,0, "Counts total number of terminal values in hashmap. For single-level maps,\n"
438 "same as length."},
439 {FUNC_CLASS_MAPS, "length", 1,0, "Counts number of top-level entries in hashmap. Scalars have length 1."},
440 {FUNC_CLASS_MAPS, "mapdiff", 0,1, "With 0 args, returns empty map. With 1 arg, returns copy of arg.\n"
441 "With 2 or more, returns copy of arg 1 with all keys from any of remaining argument maps removed."},
442 {FUNC_CLASS_MAPS, "mapexcept", 1,1, "Returns a map with keys from remaining arguments, if any, unset.\n"
443 "E.g. 'mapexcept({1:2,3:4,5:6}, 1, 5, 7)' is '{3:4}'."},
444 {FUNC_CLASS_MAPS, "mapselect", 1,1, "Returns a map with only keys from remaining arguments set.\n"
445 "E.g. 'mapselect({1:2,3:4,5:6}, 1, 5, 7)' is '{1:2,5:6}'."},
446 {FUNC_CLASS_MAPS, "mapsum", 0,1, "With 0 args, returns empty map. With >= 1 arg, returns a map with\n"
447 "key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'."},
448 {FUNC_CLASS_MAPS, "splitkv", 3,0, "Splits string by separators into map with type inference.\n"
449 "E.g. 'splitkv(\"a=1,b=2,c=3\", \"=\", \",\")' gives '{\"a\" : 1, \"b\" : 2, \"c\" : 3}'."},
450 {FUNC_CLASS_MAPS, "splitkvx", 3,0, "Splits string by separators into map without type inference (keys and\n"
451 "values are strings). E.g. 'splitkv(\"a=1,b=2,c=3\", \"=\", \",\")' gives\n"
452 "'{\"a\" : \"1\", \"b\" : \"2\", \"c\" : \"3\"}'."},
453 {FUNC_CLASS_MAPS, "splitnv", 2,0, "Splits string by separator into integer-indexed map with type inference.\n"
454 "E.g. 'splitnv(\"a,b,c\" , \",\")' gives '{1 : \"a\", 2 : \"b\", 3 : \"c\"}'."},
455 {FUNC_CLASS_MAPS, "splitnvx", 2,0, "Splits string by separator into integer-indexed map without type\n"
456 "inference (values are strings). E.g. 'splitnv(\"4,5,6\" , \",\")' gives '{1 : \"4\", 2 : \"5\", 3 : \"6\"}'."},
457
458 {0, NULL, -1 , -1, NULL}, // table terminator
459 };
460
461 // ----------------------------------------------------------------
check_arity(function_lookup_t lookup_table[],char * function_name,int user_provided_arity,int * parity,int * pvariadic)462 static arity_check_t check_arity(function_lookup_t lookup_table[], char* function_name,
463 int user_provided_arity, int *parity, int* pvariadic)
464 {
465 *parity = -1;
466 *pvariadic = FALSE;
467 int found_function_name = FALSE;
468 for (int i = 0; ; i++) {
469 function_lookup_t* plookup = &lookup_table[i];
470 if (plookup->function_name == NULL)
471 break;
472 if (streq(function_name, plookup->function_name)) {
473 found_function_name = TRUE;
474 *parity = plookup->arity;
475 if (plookup->variadic) {
476 *pvariadic = TRUE;
477 if (user_provided_arity < plookup->arity) {
478 return ARITY_CHECK_FAIL;
479 }
480 return ARITY_CHECK_PASS;
481 }
482 if (user_provided_arity == plookup->arity) {
483 return ARITY_CHECK_PASS;
484 }
485 }
486 }
487 if (found_function_name) {
488 return ARITY_CHECK_FAIL;
489 } else {
490 return ARITY_CHECK_NO_SUCH;
491 }
492 }
493
fmgr_check_arity_with_report(fmgr_t * pfmgr,char * function_name,int user_provided_arity,int * pvariadic)494 static void fmgr_check_arity_with_report(fmgr_t* pfmgr, char* function_name,
495 int user_provided_arity, int* pvariadic)
496 {
497 int arity = -1;
498 arity_check_t result = check_arity(pfmgr->function_lookup_table, function_name, user_provided_arity,
499 &arity, pvariadic);
500 if (result == ARITY_CHECK_NO_SUCH) {
501 fprintf(stderr, "%s: Function name \"%s\" not found.\n", MLR_GLOBALS.bargv0, function_name);
502 exit(1);
503 }
504 if (result == ARITY_CHECK_FAIL) {
505 // More flexibly, I'd have a list of arities supported by each
506 // function. But this is overkill: there are unary and binary minus and sec2gmt,
507 // and everything else has a single arity.
508 if (streq(function_name, "-") || streq(function_name, "sec2gmt") || streq(function_name, "sec2localtime")) {
509 fprintf(stderr, "%s: Function named \"%s\" takes one argument or two; got %d.\n",
510 MLR_GLOBALS.bargv0, function_name, user_provided_arity);
511 } else if (*pvariadic) {
512 fprintf(stderr, "%s: Function named \"%s\" takes at least %d argument%s; got %d.\n",
513 MLR_GLOBALS.bargv0, function_name, arity, (arity == 1) ? "" : "s", user_provided_arity);
514 } else {
515 fprintf(stderr, "%s: Function named \"%s\" takes %d argument%s; got %d.\n",
516 MLR_GLOBALS.bargv0, function_name, arity, (arity == 1) ? "" : "s", user_provided_arity);
517 }
518 exit(1);
519 }
520 }
521
function_class_to_desc(func_class_t function_class)522 static char* function_class_to_desc(func_class_t function_class) {
523 switch(function_class) {
524 case FUNC_CLASS_ARITHMETIC: return "arithmetic"; break;
525 case FUNC_CLASS_MATH: return "math"; break;
526 case FUNC_CLASS_BOOLEAN: return "boolean"; break;
527 case FUNC_CLASS_STRING: return "string"; break;
528 case FUNC_CLASS_CONVERSION: return "conversion"; break;
529 case FUNC_CLASS_TYPING: return "typing"; break;
530 case FUNC_CLASS_MAPS: return "maps"; break;
531 case FUNC_CLASS_TIME: return "time"; break;
532 default: return "???"; break;
533 }
534 }
535
fmgr_list_functions(fmgr_t * pfmgr,FILE * output_stream,char * leader)536 void fmgr_list_functions(fmgr_t* pfmgr, FILE* output_stream, char* leader) {
537 char* separator = " ";
538 int leaderlen = strlen(leader);
539 int separatorlen = strlen(separator);
540 int linelen = leaderlen;
541 int j = 0;
542
543 for (int i = 0; ; i++) {
544 function_lookup_t* plookup = &FUNCTION_LOOKUP_TABLE[i];
545 char* fname = plookup->function_name;
546 if (fname == NULL)
547 break;
548 int fnamelen = strlen(fname);
549 linelen += separatorlen + fnamelen;
550 if (linelen >= 80) {
551 fprintf(output_stream, "\n");
552 linelen = 0;
553 linelen = leaderlen + separatorlen + fnamelen;
554 j = 0;
555 }
556 if (j == 0)
557 fprintf(output_stream, "%s", leader);
558 fprintf(output_stream, "%s%s", separator, fname);
559 j++;
560 }
561 fprintf(output_stream, "\n");
562 }
563
564 // Pass function_name == NULL to get usage for all functions.
fmgr_function_usage(fmgr_t * pfmgr,FILE * output_stream,char * function_name)565 void fmgr_function_usage(fmgr_t* pfmgr, FILE* output_stream, char* function_name) {
566 int found = FALSE;
567 char* nfmt = "%s (class=%s #args=%d): %s\n";
568 char* vfmt = "%s (class=%s variadic): %s\n";
569
570 int num_printed = 0; // > 1 matches e.g. for - and sec2gmt
571 for (int i = 0; ; i++) {
572 function_lookup_t* plookup = &FUNCTION_LOOKUP_TABLE[i];
573 if (plookup->function_name == NULL) // end of table
574 break;
575 if (function_name == NULL || streq(function_name, plookup->function_name)) {
576 if (++num_printed > 1)
577 fprintf(output_stream, "\n");
578 if (plookup->variadic) {
579 fprintf(output_stream, vfmt, plookup->function_name,
580 function_class_to_desc(plookup->function_class),
581 plookup->usage_string);
582 } else {
583 fprintf(output_stream, nfmt, plookup->function_name,
584 function_class_to_desc(plookup->function_class),
585 plookup->arity, plookup->usage_string);
586 }
587 found = TRUE;
588 }
589 if (function_name == NULL)
590 fprintf(output_stream, "\n");
591 }
592 if (!found)
593 fprintf(output_stream, "%s: no such function.\n", function_name);
594 if (function_name == NULL) {
595 fprintf(output_stream, "To set the seed for urand, you may specify decimal or hexadecimal 32-bit\n");
596 fprintf(output_stream, "numbers of the form \"%s --seed 123456789\" or \"%s --seed 0xcafefeed\".\n",
597 MLR_GLOBALS.bargv0, MLR_GLOBALS.bargv0);
598 fprintf(output_stream, "Miller's built-in variables are NF, NR, FNR, FILENUM, and FILENAME (awk-like)\n");
599 fprintf(output_stream, "along with the mathematical constants M_PI and M_E.\n");
600 }
601 }
602
fmgr_list_all_functions_raw(fmgr_t * pfmgr,FILE * output_stream)603 void fmgr_list_all_functions_raw(fmgr_t* pfmgr, FILE* output_stream) {
604 for (int i = 0; ; i++) {
605 function_lookup_t* plookup = &FUNCTION_LOOKUP_TABLE[i];
606 if (plookup->function_name == NULL) // end of table
607 break;
608 printf("%s\n", plookup->function_name);
609 }
610 }
611
fmgr_list_all_functions_as_table(fmgr_t * pfmgr,FILE * output_stream)612 void fmgr_list_all_functions_as_table(fmgr_t* pfmgr, FILE* output_stream) {
613 fprintf(output_stream, "%-30s %-10s %s\n", "Name", "Class", "#Args");
614 for (int i = 0; ; i++) {
615 function_lookup_t* plookup = &FUNCTION_LOOKUP_TABLE[i];
616 if (plookup->function_name == NULL) // end of table
617 break;
618
619 fprintf(output_stream, "%-30s %-10s ",
620 plookup->function_name,
621 function_class_to_desc(plookup->function_class));
622 if (plookup->variadic) {
623 fprintf(output_stream, "variadic");
624 } else {
625 fprintf(output_stream, "%d", plookup->arity);
626 }
627 fprintf(output_stream, "\n");
628 }
629 }
630
631 // ================================================================
632 typedef struct _udf_callsite_state_t {
633 int arity;
634 rxval_evaluator_t** pevals;
635 boxed_xval_t* args;
636 udf_defsite_state_t* pdefsite_state;
637 } udf_callsite_state_t;
638
639 // ----------------------------------------------------------------
udf_callsite_state_alloc(fmgr_t * pfmgr,udf_defsite_state_t * pdefsite_state,mlr_dsl_ast_node_t * pnode,int arity,int type_inferencing,int context_flags)640 static udf_callsite_state_t* udf_callsite_state_alloc(
641 fmgr_t* pfmgr,
642 udf_defsite_state_t* pdefsite_state,
643 mlr_dsl_ast_node_t* pnode,
644 int arity,
645 int type_inferencing,
646 int context_flags)
647 {
648 udf_callsite_state_t* pstate = mlr_malloc_or_die(sizeof(udf_callsite_state_t));
649
650 pstate->arity = pnode->pchildren->length;
651
652 pstate->pevals = mlr_malloc_or_die(pstate->arity * sizeof(rxval_evaluator_t*));
653 int i = 0;
654 for (sllve_t* pe = pnode->pchildren->phead; pe != NULL; pe = pe->pnext, i++) {
655 mlr_dsl_ast_node_t* parg_node = pe->pvvalue;
656 pstate->pevals[i] = rxval_evaluator_alloc_from_ast(parg_node,
657 pfmgr, type_inferencing, context_flags);
658 }
659
660 pstate->args = mlr_malloc_or_die(pstate->arity * sizeof(boxed_xval_t));
661 for (i = 0; i < pstate->arity; i++) {
662 // Ownership will be transferred to local-stack which will be responsible for freeing.
663 pstate->args[i] = box_ephemeral_val(mv_absent());
664 }
665
666 pstate->pdefsite_state = pdefsite_state;
667
668 return pstate;
669 }
670
671 // ----------------------------------------------------------------
udf_callsite_state_eval_args(udf_callsite_state_t * pstate,variables_t * pvars)672 static void udf_callsite_state_eval_args(udf_callsite_state_t* pstate, variables_t* pvars) {
673 for (int i = 0; i < pstate->arity; i++) {
674 pstate->args[i] = pstate->pevals[i]->pprocess_func(pstate->pevals[i]->pvstate, pvars);
675 }
676 }
677
678 // ----------------------------------------------------------------
udf_callsite_state_free(udf_callsite_state_t * pstate)679 static void udf_callsite_state_free(udf_callsite_state_t* pstate) {
680 for (int i = 0; i < pstate->arity; i++) {
681 rxval_evaluator_t* pxev = pstate->pevals[i];
682 pxev->pfree_func(pxev);
683 }
684 free(pstate->pevals);
685 free(pstate->args);
686 free(pstate);
687 }
688
689 // ----------------------------------------------------------------
rval_evaluator_udf_callsite_process(void * pvstate,variables_t * pvars)690 static mv_t rval_evaluator_udf_callsite_process(void* pvstate, variables_t* pvars) {
691 udf_callsite_state_t* pstate = pvstate;
692
693 udf_callsite_state_eval_args(pstate, pvars);
694
695 // Functions returning map values in a scalar context get their return values treated as
696 // absent-null. (E.g. f() returns a map and g() returns an int and the statement is '$x
697 // = f() + g()'.) Non-scalar-context return values are handled separately (not here).
698 boxed_xval_t retval = pstate->pdefsite_state->pprocess_func(
699 pstate->pdefsite_state->pvstate, pstate->arity, pstate->args, pvars);
700
701 if (retval.xval.is_terminal) {
702 return retval.xval.terminal_mlrval;
703 } else {
704 if (retval.is_ephemeral) {
705 mlhmmv_xvalue_free(&retval.xval);
706 }
707 return mv_absent();
708 }
709 }
710
rxval_evaluator_udf_xcallsite_process(void * pvstate,variables_t * pvars)711 static boxed_xval_t rxval_evaluator_udf_xcallsite_process(void* pvstate, variables_t* pvars) {
712 udf_callsite_state_t* pstate = pvstate;
713 udf_callsite_state_eval_args(pstate, pvars);
714 return pstate->pdefsite_state->pprocess_func(
715 pstate->pdefsite_state->pvstate, pstate->arity, pstate->args, pvars);
716 }
717
rval_evaluator_udf_callsite_free(rval_evaluator_t * pevaluator)718 static void rval_evaluator_udf_callsite_free(rval_evaluator_t* pevaluator) {
719 udf_callsite_state_t* pstate = pevaluator->pvstate;
720 udf_callsite_state_free(pstate);
721 free(pevaluator);
722 }
723
rxval_evaluator_udf_xcallsite_free(rxval_evaluator_t * pxevaluator)724 static void rxval_evaluator_udf_xcallsite_free(rxval_evaluator_t* pxevaluator) {
725 udf_callsite_state_t* pstate = pxevaluator->pvstate;
726 udf_callsite_state_free(pstate);
727 free(pxevaluator);
728 }
729
fmgr_alloc_from_udf_callsite(fmgr_t * pfmgr,udf_defsite_state_t * pdefsite_state,mlr_dsl_ast_node_t * pnode,char * function_name,int arity,int type_inferencing,int context_flags)730 static rval_evaluator_t* fmgr_alloc_from_udf_callsite(fmgr_t* pfmgr, udf_defsite_state_t* pdefsite_state,
731 mlr_dsl_ast_node_t* pnode, char* function_name, int arity, int type_inferencing, int context_flags)
732 {
733 rval_evaluator_t* pudf_callsite_evaluator = mlr_malloc_or_die(sizeof(rval_evaluator_t));
734
735 udf_callsite_state_t* pstate = udf_callsite_state_alloc(pfmgr, pdefsite_state, pnode,
736 arity, type_inferencing, context_flags);
737
738 pudf_callsite_evaluator->pvstate = pstate;
739 pudf_callsite_evaluator->pprocess_func = rval_evaluator_udf_callsite_process;
740 pudf_callsite_evaluator->pfree_func = rval_evaluator_udf_callsite_free;
741
742 return pudf_callsite_evaluator;
743 }
744
fmgr_alloc_from_udf_xcallsite(fmgr_t * pfmgr,udf_defsite_state_t * pdefsite_state,mlr_dsl_ast_node_t * pnode,char * function_name,int arity,int type_inferencing,int context_flags)745 static rxval_evaluator_t* fmgr_alloc_from_udf_xcallsite(fmgr_t* pfmgr, udf_defsite_state_t* pdefsite_state,
746 mlr_dsl_ast_node_t* pnode, char* function_name, int arity, int type_inferencing, int context_flags)
747 {
748 rxval_evaluator_t* pudf_xcallsite_evaluator = mlr_malloc_or_die(sizeof(rval_evaluator_t));
749
750 udf_callsite_state_t* pstate = udf_callsite_state_alloc(pfmgr, pdefsite_state, pnode,
751 arity, type_inferencing, context_flags);
752
753 pudf_xcallsite_evaluator->pvstate = pstate;
754 pudf_xcallsite_evaluator->pprocess_func = rxval_evaluator_udf_xcallsite_process;
755 pudf_xcallsite_evaluator->pfree_func = rxval_evaluator_udf_xcallsite_free;
756
757 return pudf_xcallsite_evaluator;
758 }
759
760 // ================================================================
761 typedef struct _unresolved_func_callsite_state_t {
762 char* function_name;
763 int arity;
764 int type_inferencing;
765 int context_flags;
766 mlr_dsl_ast_node_t* pnode;
767 } unresolved_func_callsite_state_t;
768
unresolved_callsite_alloc(char * function_name,int arity,int type_inferencing,int context_flags,mlr_dsl_ast_node_t * pnode)769 static unresolved_func_callsite_state_t* unresolved_callsite_alloc(char* function_name, int arity,
770 int type_inferencing, int context_flags, mlr_dsl_ast_node_t* pnode)
771 {
772 unresolved_func_callsite_state_t* pstate = mlr_malloc_or_die(sizeof(unresolved_func_callsite_state_t));
773 pstate->function_name = mlr_strdup_or_die(function_name);
774 pstate->arity = arity;
775 pstate->type_inferencing = type_inferencing;
776 pstate->context_flags = context_flags;
777 pstate->pnode = pnode;
778 return pstate;
779 }
780
unresolved_callsite_free(unresolved_func_callsite_state_t * pstate)781 static void unresolved_callsite_free(unresolved_func_callsite_state_t* pstate) {
782 if (pstate == NULL)
783 return;
784 free(pstate->function_name);
785 free(pstate);
786 }
787
788 // ----------------------------------------------------------------
provisional_call_func(void * pvstate,variables_t * pvars)789 static mv_t provisional_call_func(void* pvstate, variables_t* pvars) {
790 unresolved_func_callsite_state_t* pstate = pvstate;
791 fprintf(stderr,
792 "%s: internal coding error: unresolved scalar-return-value callsite \"%s\".\n",
793 MLR_GLOBALS.bargv0, pstate->function_name);
794 exit(1);
795 }
796
provisional_call_free(rval_evaluator_t * pevaluator)797 static void provisional_call_free(rval_evaluator_t* pevaluator) {
798 unresolved_func_callsite_state_t* pstate = pevaluator->pvstate;
799 unresolved_callsite_free(pstate);
800 free(pevaluator);
801 }
802
fmgr_alloc_provisional_from_operator_or_function_call(fmgr_t * pfmgr,mlr_dsl_ast_node_t * pnode,int type_inferencing,int context_flags)803 rval_evaluator_t* fmgr_alloc_provisional_from_operator_or_function_call(fmgr_t* pfmgr, mlr_dsl_ast_node_t* pnode,
804 int type_inferencing, int context_flags)
805 {
806 char* function_name = pnode->text;
807 int user_provided_arity = pnode->pchildren->length;
808
809 unresolved_func_callsite_state_t* pstate = unresolved_callsite_alloc(function_name, user_provided_arity,
810 type_inferencing, context_flags, pnode);
811
812 rval_evaluator_t* pev = mlr_malloc_or_die(sizeof(rval_evaluator_t));
813 pev->pvstate = pstate;
814 pev->pprocess_func = provisional_call_func;
815 pev->pfree_func = provisional_call_free;
816
817 // Remember this callsite to a function which may or may not have been defined yet.
818 // Then later we can resolve them to point to UDF bodies which have been defined.
819 fmgr_mark_callsite_to_resolve(pfmgr, pev);
820
821 return pev;
822 }
823
824 // ----------------------------------------------------------------
provisional_xcall_func(void * pvstate,variables_t * pvars)825 static boxed_xval_t provisional_xcall_func(void* pvstate, variables_t* pvars) {
826 unresolved_func_callsite_state_t* pstate = pvstate;
827 fprintf(stderr,
828 "%s: internal coding error: unresolved map-return-value callsite \"%s\".\n",
829 MLR_GLOBALS.bargv0, pstate->function_name);
830 exit(1);
831 }
832
provisional_xcall_free(rxval_evaluator_t * pxevaluator)833 static void provisional_xcall_free(rxval_evaluator_t* pxevaluator) {
834 unresolved_func_callsite_state_t* pstate = pxevaluator->pvstate;
835 unresolved_callsite_free(pstate);
836 free(pxevaluator);
837 }
838
fmgr_xalloc_provisional_from_operator_or_function_call(fmgr_t * pfmgr,mlr_dsl_ast_node_t * pnode,int type_inferencing,int context_flags)839 rxval_evaluator_t* fmgr_xalloc_provisional_from_operator_or_function_call(fmgr_t* pfmgr, mlr_dsl_ast_node_t* pnode,
840 int type_inferencing, int context_flags)
841 {
842 char* function_name = pnode->text;
843 int user_provided_arity = pnode->pchildren->length;
844
845 unresolved_func_callsite_state_t* pstate = unresolved_callsite_alloc(function_name, user_provided_arity,
846 type_inferencing, context_flags, pnode);
847
848 rxval_evaluator_t* pxev = mlr_malloc_or_die(sizeof(rxval_evaluator_t));
849 pxev->pvstate = pstate;
850 pxev->pprocess_func = provisional_xcall_func;
851 pxev->pfree_func = provisional_xcall_free;
852
853 // Remember this callsite to a function which may or may not have been defined yet.
854 // Then later we can resolve them to point to UDF bodies which have been defined.
855 fmgr_mark_xcallsite_to_resolve(pfmgr, pxev);
856
857 return pxev;
858 }
859
860 // ----------------------------------------------------------------
fmgr_mark_callsite_to_resolve(fmgr_t * pfmgr,rval_evaluator_t * pev)861 void fmgr_mark_callsite_to_resolve(fmgr_t* pfmgr, rval_evaluator_t* pev) {
862 sllv_append(pfmgr->pfunc_callsite_evaluators_to_resolve, pev);
863 }
864
fmgr_mark_xcallsite_to_resolve(fmgr_t * pfmgr,rxval_evaluator_t * pxev)865 void fmgr_mark_xcallsite_to_resolve(fmgr_t* pfmgr, rxval_evaluator_t* pxev) {
866 sllv_append(pfmgr->pfunc_callsite_xevaluators_to_resolve, pxev);
867 }
868
869 // ----------------------------------------------------------------
870 // Resolving a callsite involves treewalking the AST which may find more callsites to
871 // resolve. E.g. in '$y = f(g($x))', f is initially unresolved (f and/or g perhaps as yet
872 // undefined as of when the callsite is parsed), then at resolution time for f, its
873 // argument 'g($x)' is encountered, initially unresolved, then resolved.
874 // Hence the outer loop.
fmgr_resolve_func_callsites(fmgr_t * pfmgr)875 void fmgr_resolve_func_callsites(fmgr_t* pfmgr) {
876 while (TRUE) {
877 int did = FALSE;
878 while (pfmgr->pfunc_callsite_xevaluators_to_resolve->phead != NULL) {
879 did = TRUE;
880 rxval_evaluator_t* pxev = sllv_pop(pfmgr->pfunc_callsite_xevaluators_to_resolve);
881 unresolved_func_callsite_state_t* ptemp_state = pxev->pvstate;
882 resolve_func_xcallsite(pfmgr, pxev);
883 unresolved_callsite_free(ptemp_state);
884 }
885
886 while (pfmgr->pfunc_callsite_evaluators_to_resolve->phead != NULL) {
887 did = TRUE;
888 rval_evaluator_t* pev = sllv_pop(pfmgr->pfunc_callsite_evaluators_to_resolve);
889 unresolved_func_callsite_state_t* ptemp_state = pev->pvstate;
890 resolve_func_callsite(pfmgr, pev);
891 unresolved_callsite_free(ptemp_state);
892 }
893 if (!did) {
894 break;
895 }
896 }
897 }
898
899 // ----------------------------------------------------------------
construct_udf_callsite_evaluator(fmgr_t * pfmgr,unresolved_func_callsite_state_t * pcallsite)900 static rval_evaluator_t* construct_udf_callsite_evaluator(
901 fmgr_t* pfmgr,
902 unresolved_func_callsite_state_t* pcallsite)
903 {
904 char* function_name = pcallsite->function_name;
905 int user_provided_arity = pcallsite->arity;
906 int type_inferencing = pcallsite->type_inferencing;
907 int context_flags = pcallsite->context_flags;
908 mlr_dsl_ast_node_t* pnode = pcallsite->pnode;
909
910 udf_defsite_state_t* pudf_defsite_state = lhmsv_get(pfmgr->pudf_names_to_defsite_states,
911 pcallsite->function_name);
912
913 if (pudf_defsite_state != NULL) {
914 int udf_arity = pudf_defsite_state->arity;
915 if (user_provided_arity != udf_arity) {
916 fprintf(stderr, "Function named \"%s\" takes %d argument%s; got %d.\n",
917 function_name, udf_arity, (udf_arity == 1) ? "" : "s", user_provided_arity);
918 exit(1);
919 }
920
921 return fmgr_alloc_from_udf_callsite(pfmgr, pudf_defsite_state,
922 pnode, function_name, user_provided_arity, type_inferencing, context_flags);
923 } else {
924 return NULL;
925 }
926 }
927
construct_udf_defsite_xevaluator(fmgr_t * pfmgr,unresolved_func_callsite_state_t * pcallsite)928 static rxval_evaluator_t* construct_udf_defsite_xevaluator(
929 fmgr_t* pfmgr,
930 unresolved_func_callsite_state_t* pcallsite)
931 {
932 char* function_name = pcallsite->function_name;
933 int user_provided_arity = pcallsite->arity;
934 int type_inferencing = pcallsite->type_inferencing;
935 int context_flags = pcallsite->context_flags;
936 mlr_dsl_ast_node_t* pnode = pcallsite->pnode;
937
938 udf_defsite_state_t* pudf_defsite_state = lhmsv_get(pfmgr->pudf_names_to_defsite_states,
939 pcallsite->function_name);
940
941 if (pudf_defsite_state != NULL) {
942 int udf_arity = pudf_defsite_state->arity;
943 if (user_provided_arity != udf_arity) {
944 fprintf(stderr, "Function named \"%s\" takes %d argument%s; got %d.\n",
945 function_name, udf_arity, (udf_arity == 1) ? "" : "s", user_provided_arity);
946 exit(1);
947 }
948
949 return fmgr_alloc_from_udf_xcallsite(pfmgr, pudf_defsite_state,
950 pnode, function_name, user_provided_arity, type_inferencing, context_flags);
951 } else {
952 return NULL;
953 }
954 }
955
956 // ----------------------------------------------------------------
construct_builtin_function_callsite_evaluator(fmgr_t * pfmgr,unresolved_func_callsite_state_t * pcallsite)957 static rval_evaluator_t* construct_builtin_function_callsite_evaluator(
958 fmgr_t* pfmgr,
959 unresolved_func_callsite_state_t* pcallsite)
960 {
961 char* function_name = pcallsite->function_name;
962 int user_provided_arity = pcallsite->arity;
963 int type_inferencing = pcallsite->type_inferencing;
964 int context_flags = pcallsite->context_flags;
965 mlr_dsl_ast_node_t* pnode = pcallsite->pnode;
966
967 int variadic = FALSE;
968 fmgr_check_arity_with_report(pfmgr, function_name, user_provided_arity, &variadic);
969
970 rval_evaluator_t* pevaluator = NULL;
971 if (variadic) {
972 int nargs = pnode->pchildren->length;
973 rval_evaluator_t** pargs = mlr_malloc_or_die(nargs * sizeof(rval_evaluator_t*));
974 int i = 0;
975 for (sllve_t* pe = pnode->pchildren->phead; pe != NULL; pe = pe->pnext, i++) {
976 mlr_dsl_ast_node_t* pchild = pe->pvvalue;
977 pargs[i] = rval_evaluator_alloc_from_ast(pchild, pfmgr, type_inferencing, context_flags);
978 }
979 pevaluator = fmgr_alloc_evaluator_from_variadic_func_name(function_name, pargs, nargs);
980
981 } else if (user_provided_arity == 0) {
982 pevaluator = fmgr_alloc_evaluator_from_zary_func_name(function_name);
983 } else if (user_provided_arity == 1) {
984 mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
985 rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
986 pevaluator = fmgr_alloc_evaluator_from_unary_func_name(function_name, parg1);
987 } else if (user_provided_arity == 2) {
988 mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
989 mlr_dsl_ast_node_t* parg2_node = pnode->pchildren->phead->pnext->pvvalue;
990 int type2 = parg2_node->type;
991
992 int is_regexy =
993 streq(function_name, "=~") ||
994 streq(function_name, "!=~") ||
995 streq(function_name, "regextract");
996
997 if (is_regexy && type2 == MD_AST_NODE_TYPE_STRING_LITERAL) {
998 rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
999 pevaluator = fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(function_name,
1000 parg1, parg2_node->text, FALSE);
1001 } else if (is_regexy && type2 == MD_AST_NODE_TYPE_REGEXI) {
1002 rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1003 pevaluator = fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(function_name, parg1, parg2_node->text,
1004 TYPE_INFER_STRING_FLOAT_INT);
1005 } else {
1006 // regexes can still be applied here, e.g. if the 2nd argument is a non-terminal AST: however
1007 // the regexes will be compiled record-by-record rather than once at alloc time, which will
1008 // be slower.
1009 rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1010 rval_evaluator_t* parg2 = rval_evaluator_alloc_from_ast(parg2_node, pfmgr, type_inferencing, context_flags);
1011 pevaluator = fmgr_alloc_evaluator_from_binary_func_name(function_name, parg1, parg2);
1012 }
1013
1014 } else if (user_provided_arity == 3) {
1015 mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
1016 mlr_dsl_ast_node_t* parg2_node = pnode->pchildren->phead->pnext->pvvalue;
1017 mlr_dsl_ast_node_t* parg3_node = pnode->pchildren->phead->pnext->pnext->pvvalue;
1018 int type2 = parg2_node->type;
1019
1020 int is_regexy =
1021 streq(function_name, "sub") ||
1022 streq(function_name, "gsub") ||
1023 streq(function_name, "regextract_or_else");
1024
1025 if (is_regexy && type2 == MD_AST_NODE_TYPE_STRING_LITERAL) {
1026 // sub/gsub-regex special case:
1027 rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1028 rval_evaluator_t* parg3 = rval_evaluator_alloc_from_ast(parg3_node, pfmgr, type_inferencing, context_flags);
1029 pevaluator = fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(function_name, parg1, parg2_node->text,
1030 FALSE, parg3);
1031
1032 } else if (is_regexy && type2 == MD_AST_NODE_TYPE_REGEXI) {
1033 // sub/gsub-regex special case:
1034 rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1035 rval_evaluator_t* parg3 = rval_evaluator_alloc_from_ast(parg3_node, pfmgr, type_inferencing, context_flags);
1036 pevaluator = fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(function_name, parg1, parg2_node->text,
1037 TYPE_INFER_STRING_FLOAT_INT, parg3);
1038
1039 } else {
1040 // regexes can still be applied here, e.g. if the 2nd argument is a non-terminal AST: however
1041 // the regexes will be compiled record-by-record rather than once at alloc time, which will
1042 // be slower.
1043 rval_evaluator_t* parg1 = rval_evaluator_alloc_from_ast(parg1_node, pfmgr, type_inferencing, context_flags);
1044 rval_evaluator_t* parg2 = rval_evaluator_alloc_from_ast(parg2_node, pfmgr, type_inferencing, context_flags);
1045 rval_evaluator_t* parg3 = rval_evaluator_alloc_from_ast(parg3_node, pfmgr, type_inferencing, context_flags);
1046 pevaluator = fmgr_alloc_evaluator_from_ternary_func_name(function_name, parg1, parg2, parg3);
1047 }
1048
1049 } else {
1050 fprintf(stderr, "Miller: internal coding error: arity for function name \"%s\" misdetected.\n",
1051 function_name);
1052 exit(1);
1053 }
1054
1055 return pevaluator;
1056 }
1057
1058 // ----------------------------------------------------------------
1059 // At callsites, arguments can be scalars or maps; return values can be scalars
1060 // or maps. At the user level, a function take map input and produce scalar
1061 // output or vice versa. As of this writing, though, *internally* functions
1062 // go from scalars to scalar or maps to map. This wrapper wraps scalar input
1063 // to functions which know about maps.
1064
1065 typedef struct _xeval_wrapping_eval_state_t {
1066 rval_evaluator_t* pevaluator;
1067 } xeval_wrapping_eval_state_t;
1068
xeval_wrapping_eval_func(void * pvstate,variables_t * pvars)1069 static boxed_xval_t xeval_wrapping_eval_func(void* pvstate, variables_t* pvars) {
1070 xeval_wrapping_eval_state_t* pstate = pvstate;
1071 rval_evaluator_t* pevaluator = pstate->pevaluator;
1072 mv_t val = pevaluator->pprocess_func(pevaluator->pvstate, pvars);
1073 return (boxed_xval_t) {
1074 .xval = mlhmmv_xvalue_wrap_terminal(val),
1075 .is_ephemeral = TRUE, // xxx verify reference semantics for RHS evaluators!
1076 };
1077 }
1078
xeval_wrapping_eval_free(rxval_evaluator_t * pxevaluator)1079 static void xeval_wrapping_eval_free(rxval_evaluator_t* pxevaluator) {
1080 xeval_wrapping_eval_state_t* pstate = pxevaluator->pvstate;
1081 pstate->pevaluator->pfree_func(pstate->pevaluator);
1082 free(pstate);
1083 free(pxevaluator);
1084 }
1085
fmgr_alloc_xeval_wrapping_eval(rval_evaluator_t * pevaluator)1086 static rxval_evaluator_t* fmgr_alloc_xeval_wrapping_eval(rval_evaluator_t* pevaluator) {
1087 rxval_evaluator_t* pxevaluator = mlr_malloc_or_die(sizeof(rxval_evaluator_t));
1088
1089 xeval_wrapping_eval_state_t* pstate = mlr_malloc_or_die(sizeof(xeval_wrapping_eval_state_t));
1090 pstate->pevaluator = pevaluator;
1091
1092 pxevaluator->pvstate = pstate;
1093 pxevaluator->pprocess_func = xeval_wrapping_eval_func;
1094 pxevaluator->pfree_func = xeval_wrapping_eval_free;
1095
1096 return pxevaluator;
1097 }
1098
1099 // ----------------------------------------------------------------
1100 // At callsites, arguments can be scalars or maps; return values can be scalars
1101 // or maps. At the user level, a function take map input and produce scalar
1102 // output or vice versa. As of this writing, though, *internally* functions go
1103 // from scalars to scalar or maps to map. This wrapper wraps maybe-map input to
1104 // functions which do not know about maps.
1105
1106 typedef struct _eval_wrapping_xeval_state_t {
1107 rxval_evaluator_t* pxevaluator;
1108 } eval_wrapping_xeval_state_t;
1109
eval_wrapping_xeval_func(void * pvstate,variables_t * pvars)1110 static mv_t eval_wrapping_xeval_func(void* pvstate, variables_t* pvars) {
1111 eval_wrapping_xeval_state_t* pstate = pvstate;
1112 rxval_evaluator_t* pxevaluator = pstate->pxevaluator;
1113 boxed_xval_t bxval = pxevaluator->pprocess_func(pxevaluator->pvstate, pvars);
1114
1115 if (bxval.xval.is_terminal) {
1116 if (bxval.is_ephemeral) {
1117 return bxval.xval.terminal_mlrval;
1118 } else {
1119 return mv_copy(&bxval.xval.terminal_mlrval);
1120 }
1121
1122 } else {
1123 if (bxval.is_ephemeral) {
1124 mlhmmv_xvalue_free(&bxval.xval);
1125 }
1126 return mv_error();
1127 }
1128
1129 }
1130
eval_wrapping_xeval_free(rval_evaluator_t * pevaluator)1131 static void eval_wrapping_xeval_free(rval_evaluator_t* pevaluator) {
1132 eval_wrapping_xeval_state_t* pstate = pevaluator->pvstate;
1133 pstate->pxevaluator->pfree_func(pstate->pxevaluator);
1134 free(pstate);
1135 free(pevaluator);
1136 }
1137
fmgr_alloc_eval_wrapping_xeval(rxval_evaluator_t * pxevaluator)1138 static rval_evaluator_t* fmgr_alloc_eval_wrapping_xeval(rxval_evaluator_t* pxevaluator) {
1139 rval_evaluator_t* pevaluator = mlr_malloc_or_die(sizeof(rval_evaluator_t));
1140
1141 eval_wrapping_xeval_state_t* pstate = mlr_malloc_or_die(sizeof(eval_wrapping_xeval_state_t));
1142 pstate->pxevaluator = pxevaluator;
1143
1144 pevaluator->pvstate = pstate;
1145 pevaluator->pprocess_func = eval_wrapping_xeval_func;
1146 pevaluator->pfree_func = eval_wrapping_xeval_free;
1147
1148 return pevaluator;
1149 }
1150
1151 // ================================================================
fmgr_alloc_evaluator_from_variadic_func_name(char * fnnm,rval_evaluator_t ** pargs,int nargs)1152 static rval_evaluator_t* fmgr_alloc_evaluator_from_variadic_func_name(char* fnnm, rval_evaluator_t** pargs, int nargs) {
1153 if (streq(fnnm, "min")) { return rval_evaluator_alloc_from_variadic_func(variadic_min_func, pargs, nargs);
1154 } else if (streq(fnnm, "max")) { return rval_evaluator_alloc_from_variadic_func(variadic_max_func, pargs, nargs);
1155 } else return NULL;
1156 }
1157
1158 // ================================================================
fmgr_alloc_evaluator_from_zary_func_name(char * function_name)1159 static rval_evaluator_t* fmgr_alloc_evaluator_from_zary_func_name(char* function_name) {
1160 if (streq(function_name, "urand")) {
1161 return rval_evaluator_alloc_from_x_z_func(f_z_urand_func);
1162 } else if (streq(function_name, "urand32")) {
1163 return rval_evaluator_alloc_from_x_z_func(i_z_urand32_func);
1164 } else if (streq(function_name, "systime")) {
1165 return rval_evaluator_alloc_from_x_z_func(f_z_systime_func);
1166 } else {
1167 return NULL;
1168 }
1169 }
1170
1171 // ================================================================
fmgr_alloc_evaluator_from_unary_func_name(char * fnnm,rval_evaluator_t * parg1)1172 static rval_evaluator_t* fmgr_alloc_evaluator_from_unary_func_name(char* fnnm, rval_evaluator_t* parg1) {
1173 if (streq(fnnm, "!")) { return rval_evaluator_alloc_from_b_b_func(b_b_not_func, parg1);
1174 } else if (streq(fnnm, "+")) { return rval_evaluator_alloc_from_x_x_func(x_x_upos_func, parg1);
1175 } else if (streq(fnnm, "-")) { return rval_evaluator_alloc_from_x_x_func(x_x_uneg_func, parg1);
1176 } else if (streq(fnnm, ".+")) { return rval_evaluator_alloc_from_x_x_func(x_x_upos_func, parg1);
1177 } else if (streq(fnnm, ".-")) { return rval_evaluator_alloc_from_x_x_func(x_x_uneg_func, parg1);
1178 } else if (streq(fnnm, "abs")) { return rval_evaluator_alloc_from_x_x_func(x_x_abs_func, parg1);
1179 } else if (streq(fnnm, "acos")) { return rval_evaluator_alloc_from_f_f_func(f_f_acos_func, parg1);
1180 } else if (streq(fnnm, "acosh")) { return rval_evaluator_alloc_from_f_f_func(f_f_acosh_func, parg1);
1181 } else if (streq(fnnm, "asin")) { return rval_evaluator_alloc_from_f_f_func(f_f_asin_func, parg1);
1182 } else if (streq(fnnm, "asinh")) { return rval_evaluator_alloc_from_f_f_func(f_f_asinh_func, parg1);
1183 } else if (streq(fnnm, "atan")) { return rval_evaluator_alloc_from_f_f_func(f_f_atan_func, parg1);
1184 } else if (streq(fnnm, "atanh")) { return rval_evaluator_alloc_from_f_f_func(f_f_atanh_func, parg1);
1185 } else if (streq(fnnm, "bitcount")) { return rval_evaluator_alloc_from_i_i_func(i_i_bitcount_func, parg1);
1186 } else if (streq(fnnm, "boolean")) { return rval_evaluator_alloc_from_x_x_func(b_x_boolean_func, parg1);
1187 } else if (streq(fnnm, "cbrt")) { return rval_evaluator_alloc_from_f_f_func(f_f_cbrt_func, parg1);
1188 } else if (streq(fnnm, "ceil")) { return rval_evaluator_alloc_from_x_x_func(x_x_ceil_func, parg1);
1189 } else if (streq(fnnm, "cos")) { return rval_evaluator_alloc_from_f_f_func(f_f_cos_func, parg1);
1190 } else if (streq(fnnm, "cosh")) { return rval_evaluator_alloc_from_f_f_func(f_f_cosh_func, parg1);
1191 } else if (streq(fnnm, "dhms2fsec")) { return rval_evaluator_alloc_from_f_s_func(f_s_dhms2fsec_func, parg1);
1192 } else if (streq(fnnm, "dhms2sec")) { return rval_evaluator_alloc_from_f_s_func(i_s_dhms2sec_func, parg1);
1193 } else if (streq(fnnm, "erf")) { return rval_evaluator_alloc_from_f_f_func(f_f_erf_func, parg1);
1194 } else if (streq(fnnm, "erfc")) { return rval_evaluator_alloc_from_f_f_func(f_f_erfc_func, parg1);
1195 } else if (streq(fnnm, "exp")) { return rval_evaluator_alloc_from_f_f_func(f_f_exp_func, parg1);
1196 } else if (streq(fnnm, "expm1")) { return rval_evaluator_alloc_from_f_f_func(f_f_expm1_func, parg1);
1197 } else if (streq(fnnm, "float")) { return rval_evaluator_alloc_from_x_x_func(f_x_float_func, parg1);
1198 } else if (streq(fnnm, "floor")) { return rval_evaluator_alloc_from_x_x_func(x_x_floor_func, parg1);
1199 } else if (streq(fnnm, "fsec2dhms")) { return rval_evaluator_alloc_from_s_f_func(s_f_fsec2dhms_func, parg1);
1200 } else if (streq(fnnm, "fsec2hms")) { return rval_evaluator_alloc_from_s_f_func(s_f_fsec2hms_func, parg1);
1201 } else if (streq(fnnm, "gmt2sec")) { return rval_evaluator_alloc_from_i_s_func(i_s_gmt2sec_func, parg1);
1202 } else if (streq(fnnm, "localtime2sec")) { return rval_evaluator_alloc_from_i_s_func(i_s_localtime2sec_func, parg1);
1203 } else if (streq(fnnm, "hexfmt")) { return rval_evaluator_alloc_from_x_x_func(s_x_hexfmt_func, parg1);
1204 } else if (streq(fnnm, "hms2fsec")) { return rval_evaluator_alloc_from_f_s_func(f_s_hms2fsec_func, parg1);
1205 } else if (streq(fnnm, "hms2sec")) { return rval_evaluator_alloc_from_f_s_func(i_s_hms2sec_func, parg1);
1206 } else if (streq(fnnm, "int")) { return rval_evaluator_alloc_from_x_x_func(i_x_int_func, parg1);
1207 } else if (streq(fnnm, "invqnorm")) { return rval_evaluator_alloc_from_f_f_func(f_f_invqnorm_func, parg1);
1208 } else if (streq(fnnm, "log")) { return rval_evaluator_alloc_from_f_f_func(f_f_log_func, parg1);
1209 } else if (streq(fnnm, "log10")) { return rval_evaluator_alloc_from_f_f_func(f_f_log10_func, parg1);
1210 } else if (streq(fnnm, "log1p")) { return rval_evaluator_alloc_from_f_f_func(f_f_log1p_func, parg1);
1211 } else if (streq(fnnm, "qnorm")) { return rval_evaluator_alloc_from_f_f_func(f_f_qnorm_func, parg1);
1212 } else if (streq(fnnm, "round")) { return rval_evaluator_alloc_from_x_x_func(x_x_round_func, parg1);
1213 } else if (streq(fnnm, "sec2dhms")) { return rval_evaluator_alloc_from_s_i_func(s_i_sec2dhms_func, parg1);
1214 } else if (streq(fnnm, "sec2gmt")) { return rval_evaluator_alloc_from_x_x_func(s_x_sec2gmt_func, parg1);
1215 } else if (streq(fnnm, "sec2gmtdate")) { return rval_evaluator_alloc_from_x_x_func(s_x_sec2gmtdate_func, parg1);
1216 } else if (streq(fnnm, "sec2localtime")) { return rval_evaluator_alloc_from_x_x_func(s_x_sec2localtime_func, parg1);
1217 } else if (streq(fnnm, "sec2localdate")) { return rval_evaluator_alloc_from_x_x_func(s_x_sec2localdate_func, parg1);
1218 } else if (streq(fnnm, "sec2hms")) { return rval_evaluator_alloc_from_s_i_func(s_i_sec2hms_func, parg1);
1219 } else if (streq(fnnm, "sgn")) { return rval_evaluator_alloc_from_x_x_func(x_x_sgn_func, parg1);
1220 } else if (streq(fnnm, "sin")) { return rval_evaluator_alloc_from_f_f_func(f_f_sin_func, parg1);
1221 } else if (streq(fnnm, "sinh")) { return rval_evaluator_alloc_from_f_f_func(f_f_sinh_func, parg1);
1222 } else if (streq(fnnm, "sqrt")) { return rval_evaluator_alloc_from_f_f_func(f_f_sqrt_func, parg1);
1223 } else if (streq(fnnm, "string")) { return rval_evaluator_alloc_from_x_x_func(s_x_string_func, parg1);
1224 } else if (streq(fnnm, "strlen")) { return rval_evaluator_alloc_from_i_s_func(i_s_strlen_func, parg1);
1225 } else if (streq(fnnm, "tan")) { return rval_evaluator_alloc_from_f_f_func(f_f_tan_func, parg1);
1226 } else if (streq(fnnm, "tanh")) { return rval_evaluator_alloc_from_f_f_func(f_f_tanh_func, parg1);
1227 } else if (streq(fnnm, "tolower")) { return rval_evaluator_alloc_from_s_s_func(s_s_tolower_func, parg1);
1228 } else if (streq(fnnm, "toupper")) { return rval_evaluator_alloc_from_s_s_func(s_s_toupper_func, parg1);
1229 } else if (streq(fnnm, "capitalize")) { return rval_evaluator_alloc_from_s_s_func(s_s_capitalize_func, parg1);
1230 } else if (streq(fnnm, "system")) { return rval_evaluator_alloc_from_s_s_func(s_s_system_func, parg1);
1231 } else if (streq(fnnm, "lstrip")) { return rval_evaluator_alloc_from_s_s_func(s_s_lstrip_func, parg1);
1232 } else if (streq(fnnm, "rstrip")) { return rval_evaluator_alloc_from_s_s_func(s_s_rstrip_func, parg1);
1233 } else if (streq(fnnm, "strip")) { return rval_evaluator_alloc_from_s_s_func(s_s_strip_func, parg1);
1234 } else if (streq(fnnm, "collapse_whitespace")) { return rval_evaluator_alloc_from_s_s_func(s_s_collapse_whitespace_func, parg1);
1235 } else if (streq(fnnm, "clean_whitespace")) { return rval_evaluator_alloc_from_s_s_func(s_s_clean_whitespace_func, parg1);
1236 } else if (streq(fnnm, "~")) { return rval_evaluator_alloc_from_i_i_func(i_i_bitwise_not_func, parg1);
1237
1238 } else return NULL;
1239 }
1240
1241 // ================================================================
fmgr_alloc_evaluator_from_binary_func_name(char * fnnm,rval_evaluator_t * parg1,rval_evaluator_t * parg2)1242 static rval_evaluator_t* fmgr_alloc_evaluator_from_binary_func_name(char* fnnm,
1243 rval_evaluator_t* parg1, rval_evaluator_t* parg2)
1244 {
1245 if (streq(fnnm, "&&")) { return rval_evaluator_alloc_from_b_bb_and_func(parg1, parg2);
1246 } else if (streq(fnnm, "||")) { return rval_evaluator_alloc_from_b_bb_or_func (parg1, parg2);
1247 } else if (streq(fnnm, "^^")) { return rval_evaluator_alloc_from_b_bb_xor_func(parg1, parg2);
1248 } else if (streq(fnnm, "=~")) { return rval_evaluator_alloc_from_x_ssc_func(
1249 matches_no_precomp_func, parg1, parg2);
1250 } else if (streq(fnnm, "regextract")) { return rval_evaluator_alloc_from_x_ss_func(
1251 regextract_no_precomp_func, parg1, parg2);
1252 } else if (streq(fnnm, "!=~")) { return rval_evaluator_alloc_from_x_ssc_func(does_not_match_no_precomp_func, parg1, parg2);
1253 } else if (streq(fnnm, "==")) { return rval_evaluator_alloc_from_x_xx_func(eq_op_func, parg1, parg2);
1254 } else if (streq(fnnm, "!=")) { return rval_evaluator_alloc_from_x_xx_func(ne_op_func, parg1, parg2);
1255 } else if (streq(fnnm, ">")) { return rval_evaluator_alloc_from_x_xx_func(gt_op_func, parg1, parg2);
1256 } else if (streq(fnnm, ">=")) { return rval_evaluator_alloc_from_x_xx_func(ge_op_func, parg1, parg2);
1257 } else if (streq(fnnm, "<")) { return rval_evaluator_alloc_from_x_xx_func(lt_op_func, parg1, parg2);
1258 } else if (streq(fnnm, "<=")) { return rval_evaluator_alloc_from_x_xx_func(le_op_func, parg1, parg2);
1259 } else if (streq(fnnm, ".")) { return rval_evaluator_alloc_from_x_xx_func(s_xx_dot_func, parg1, parg2);
1260
1261 } else if (streq(fnnm, "+")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_plus_func, parg1, parg2);
1262 } else if (streq(fnnm, "-")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_minus_func, parg1, parg2);
1263 } else if (streq(fnnm, "*")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_times_func, parg1, parg2);
1264 } else if (streq(fnnm, "/")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_divide_func, parg1, parg2);
1265 } else if (streq(fnnm, "//")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_int_divide_func, parg1, parg2);
1266
1267 } else if (streq(fnnm, ".+")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_oplus_func, parg1, parg2);
1268 } else if (streq(fnnm, ".-")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_ominus_func, parg1, parg2);
1269 } else if (streq(fnnm, ".*")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_otimes_func, parg1, parg2);
1270 } else if (streq(fnnm, "./")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_odivide_func, parg1, parg2);
1271 } else if (streq(fnnm, ".//")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_int_odivide_func, parg1, parg2);
1272
1273 } else if (streq(fnnm, "%")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_mod_func, parg1, parg2);
1274 } else if (streq(fnnm, "**")) { return rval_evaluator_alloc_from_f_ff_func(f_ff_pow_func, parg1, parg2);
1275 } else if (streq(fnnm, "pow")) { return rval_evaluator_alloc_from_f_ff_func(f_ff_pow_func, parg1, parg2);
1276 } else if (streq(fnnm, "atan2")){ return rval_evaluator_alloc_from_f_ff_func(f_ff_atan2_func, parg1, parg2);
1277 } else if (streq(fnnm, "roundm")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_roundm_func, parg1, parg2);
1278 } else if (streq(fnnm, "fmtnum")) { return rval_evaluator_alloc_from_s_xs_func(s_xs_fmtnum_func, parg1, parg2);
1279 } else if (streq(fnnm, "urandint")) { return rval_evaluator_alloc_from_i_ii_func(i_ii_urandint_func, parg1, parg2);
1280 } else if (streq(fnnm, "sec2gmt")) { return rval_evaluator_alloc_from_x_xi_func(s_xi_sec2gmt_func, parg1, parg2);
1281 } else if (streq(fnnm, "sec2localtime")) { return rval_evaluator_alloc_from_x_xi_func(s_xi_sec2localtime_func, parg1, parg2);
1282 } else if (streq(fnnm, "&")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_band_func, parg1, parg2);
1283 } else if (streq(fnnm, "|")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_bor_func, parg1, parg2);
1284 } else if (streq(fnnm, "^")) { return rval_evaluator_alloc_from_x_xx_func(x_xx_bxor_func, parg1, parg2);
1285 } else if (streq(fnnm, "<<")) { return rval_evaluator_alloc_from_i_ii_func(i_ii_bitwise_lsh_func, parg1, parg2);
1286 } else if (streq(fnnm, ">>")) { return rval_evaluator_alloc_from_i_ii_func(i_ii_bitwise_rsh_func, parg1, parg2);
1287 } else if (streq(fnnm, "strftime")) { return rval_evaluator_alloc_from_x_ns_func(s_ns_strftime_func, parg1, parg2);
1288 } else if (streq(fnnm, "strftime_local")) { return rval_evaluator_alloc_from_x_ns_func(s_ns_strftime_local_func, parg1, parg2);
1289 } else if (streq(fnnm, "strptime")) { return rval_evaluator_alloc_from_x_ss_func(i_ss_strptime_func, parg1, parg2);
1290 } else if (streq(fnnm, "strptime_local")) { return rval_evaluator_alloc_from_x_ss_func(i_ss_strptime_local_func, parg1, parg2);
1291 } else if (streq(fnnm, "urandrange")) { return rval_evaluator_alloc_from_f_ff_func(f_ff_urandrange_func, parg1, parg2);
1292 } else if (streq(fnnm, "truncate")) { return rval_evaluator_alloc_from_s_si_func(s_si_truncate_func, parg1, parg2);
1293 } else { return NULL; }
1294 }
1295
fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(char * fnnm,rval_evaluator_t * parg1,char * regex_string,int ignore_case)1296 static rval_evaluator_t* fmgr_alloc_evaluator_from_binary_regex_arg2_func_name(char* fnnm,
1297 rval_evaluator_t* parg1, char* regex_string, int ignore_case)
1298 {
1299 if (streq(fnnm, "=~")) {
1300 return rval_evaluator_alloc_from_x_sr_func(matches_precomp_func, parg1, regex_string, ignore_case);
1301 } else if (streq(fnnm, "!=~")) {
1302 return rval_evaluator_alloc_from_x_sr_func(does_not_match_precomp_func, parg1, regex_string, ignore_case);
1303 } else if (streq(fnnm, "regextract")) {
1304 return rval_evaluator_alloc_from_x_se_func(regextract_precomp_func, parg1, regex_string, ignore_case);
1305 } else { return NULL; }
1306 }
1307
1308 // ================================================================
fmgr_alloc_evaluator_from_ternary_func_name(char * fnnm,rval_evaluator_t * parg1,rval_evaluator_t * parg2,rval_evaluator_t * parg3)1309 static rval_evaluator_t* fmgr_alloc_evaluator_from_ternary_func_name(char* fnnm,
1310 rval_evaluator_t* parg1, rval_evaluator_t* parg2, rval_evaluator_t* parg3)
1311 {
1312 if (streq(fnnm, "sub")) {
1313 return rval_evaluator_alloc_from_s_sss_func(sub_no_precomp_func, parg1, parg2, parg3);
1314 } else if (streq(fnnm, "gsub")) {
1315 return rval_evaluator_alloc_from_s_sss_func(gsub_no_precomp_func, parg1, parg2, parg3);
1316 } else if (streq(fnnm, "ssub")) {
1317 return rval_evaluator_alloc_from_s_sss_func(s_sss_ssub_func, parg1, parg2, parg3);
1318 } else if (streq(fnnm, "regextract_or_else")) {
1319 return rval_evaluator_alloc_from_s_sss_func(regextract_or_else_no_precomp_func, parg1, parg2, parg3);
1320 } else if (streq(fnnm, "logifit")) {
1321 return rval_evaluator_alloc_from_f_fff_func(f_fff_logifit_func, parg1, parg2, parg3);
1322 } else if (streq(fnnm, "madd")) {
1323 return rval_evaluator_alloc_from_i_iii_func(i_iii_modadd_func, parg1, parg2, parg3);
1324 } else if (streq(fnnm, "msub")) {
1325 return rval_evaluator_alloc_from_i_iii_func(i_iii_modsub_func, parg1, parg2, parg3);
1326 } else if (streq(fnnm, "mmul")) {
1327 return rval_evaluator_alloc_from_i_iii_func(i_iii_modmul_func, parg1, parg2, parg3);
1328 } else if (streq(fnnm, "mexp")) {
1329 return rval_evaluator_alloc_from_i_iii_func(i_iii_modexp_func, parg1, parg2, parg3);
1330 } else if (streq(fnnm, "substr")) {
1331 return rval_evaluator_alloc_from_s_sii_func(s_sii_substr_func, parg1, parg2, parg3);
1332 } else if (streq(fnnm, "? :")) {
1333 return rval_evaluator_alloc_from_ternop(parg1, parg2, parg3);
1334 } else { return NULL; }
1335 }
1336
fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(char * fnnm,rval_evaluator_t * parg1,char * regex_string,int ignore_case,rval_evaluator_t * parg3)1337 static rval_evaluator_t* fmgr_alloc_evaluator_from_ternary_regex_arg2_func_name(char* fnnm,
1338 rval_evaluator_t* parg1, char* regex_string, int ignore_case, rval_evaluator_t* parg3)
1339 {
1340 if (streq(fnnm, "sub")) {
1341 return rval_evaluator_alloc_from_x_srs_func(sub_precomp_func, parg1, regex_string, ignore_case, parg3);
1342 } else if (streq(fnnm, "gsub")) {
1343 return rval_evaluator_alloc_from_x_srs_func(gsub_precomp_func, parg1, regex_string, ignore_case, parg3);
1344 } else if (streq(fnnm, "regextract_or_else")) {
1345 return rval_evaluator_alloc_from_x_ses_func(regextract_or_else_precomp_func, parg1, regex_string, ignore_case, parg3);
1346 } else { return NULL; }
1347 }
1348
1349 // ================================================================
construct_builtin_function_callsite_xevaluator(fmgr_t * pfmgr,unresolved_func_callsite_state_t * pcallsite)1350 static rxval_evaluator_t* construct_builtin_function_callsite_xevaluator(
1351 fmgr_t* pfmgr,
1352 unresolved_func_callsite_state_t* pcallsite)
1353 {
1354 char* function_name = pcallsite->function_name;
1355 int user_provided_arity = pcallsite->arity;
1356 int type_inferencing = pcallsite->type_inferencing;
1357 int context_flags = pcallsite->context_flags;
1358 mlr_dsl_ast_node_t* pnode = pcallsite->pnode;
1359
1360 int variadic = FALSE;
1361 fmgr_check_arity_with_report(pfmgr, function_name, user_provided_arity, &variadic);
1362
1363 rxval_evaluator_t* pxevaluator = NULL;
1364 if (variadic) {
1365 pxevaluator = fmgr_alloc_xevaluator_from_variadic_func_name(function_name, pnode->pchildren,
1366 pfmgr, type_inferencing, context_flags);
1367
1368 } else if (user_provided_arity == 1) {
1369 mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
1370 pxevaluator = fmgr_alloc_xevaluator_from_unary_func_name(function_name, parg1_node,
1371 pfmgr, type_inferencing, context_flags);
1372
1373 } else if (user_provided_arity == 2) {
1374 mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
1375 mlr_dsl_ast_node_t* parg2_node = pnode->pchildren->phead->pnext->pvvalue;
1376 pxevaluator = fmgr_alloc_xevaluator_from_binary_func_name(function_name, parg1_node, parg2_node,
1377 pfmgr, type_inferencing, context_flags);
1378
1379 } else if (user_provided_arity == 3) {
1380 mlr_dsl_ast_node_t* parg1_node = pnode->pchildren->phead->pvvalue;
1381 mlr_dsl_ast_node_t* parg2_node = pnode->pchildren->phead->pnext->pvvalue;
1382 mlr_dsl_ast_node_t* parg3_node = pnode->pchildren->phead->pnext->pnext->pvvalue;
1383 pxevaluator = fmgr_alloc_xevaluator_from_ternary_func_name(function_name, parg1_node, parg2_node, parg3_node,
1384 pfmgr, type_inferencing, context_flags);
1385 }
1386
1387 return pxevaluator;
1388 }
1389
1390 // ----------------------------------------------------------------
fmgr_alloc_xevaluator_from_variadic_func_name(char * function_name,sllv_t * parg_nodes,fmgr_t * pfmgr,int type_inferencing,int context_flags)1391 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_variadic_func_name(
1392 char* function_name,
1393 sllv_t* parg_nodes,
1394 fmgr_t* pfmgr,
1395 int type_inferencing,
1396 int context_flags)
1397 {
1398 if (streq(function_name, "mapsum")) {
1399 return rxval_evaluator_alloc_from_variadic_func(variadic_mapsum_xfunc, parg_nodes,
1400 pfmgr, type_inferencing, context_flags);
1401 } else if (streq(function_name, "mapdiff")) {
1402 return rxval_evaluator_alloc_from_variadic_func(variadic_mapdiff_xfunc, parg_nodes,
1403 pfmgr, type_inferencing, context_flags);
1404 } else if (streq(function_name, "mapexcept")) {
1405 return rxval_evaluator_alloc_from_variadic_func(variadic_mapexcept_xfunc, parg_nodes,
1406 pfmgr, type_inferencing, context_flags);
1407 } else if (streq(function_name, "mapselect")) {
1408 return rxval_evaluator_alloc_from_variadic_func(variadic_mapselect_xfunc, parg_nodes,
1409 pfmgr, type_inferencing, context_flags);
1410 } else {
1411 return NULL;
1412 }
1413 }
1414
1415 // ----------------------------------------------------------------
fmgr_alloc_xevaluator_from_unary_func_name(char * fnnm,mlr_dsl_ast_node_t * parg1,fmgr_t * pf,int ti,int cf)1416 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_unary_func_name(char* fnnm,
1417 mlr_dsl_ast_node_t* parg1,
1418 fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/)
1419 {
1420
1421 if (streq(fnnm, "asserting_absent")) {
1422 return rxval_evaluator_alloc_from_A_x_func(b_x_is_absent_no_free_xfunc, parg1, pf, ti, cf, "absent");
1423 } else if (streq(fnnm, "asserting_bool")) {
1424 return rxval_evaluator_alloc_from_A_x_func(b_x_is_boolean_no_free_xfunc, parg1, pf, ti, cf, "boolean");
1425 } else if (streq(fnnm, "asserting_boolean")) {
1426 return rxval_evaluator_alloc_from_A_x_func(b_x_is_boolean_no_free_xfunc, parg1, pf, ti, cf, "boolean");
1427 } else if (streq(fnnm, "asserting_empty")) {
1428 return rxval_evaluator_alloc_from_A_x_func(b_x_is_empty_no_free_xfunc, parg1, pf, ti, cf, "empty");
1429 } else if (streq(fnnm, "asserting_empty_map")) {
1430 return rxval_evaluator_alloc_from_A_x_func(b_x_is_empty_map_no_free_xfunc, parg1, pf, ti, cf, "empty_map");
1431 } else if (streq(fnnm, "asserting_float")) {
1432 return rxval_evaluator_alloc_from_A_x_func(b_x_is_float_no_free_xfunc, parg1, pf, ti, cf, "float");
1433 } else if (streq(fnnm, "asserting_int")) {
1434 return rxval_evaluator_alloc_from_A_x_func(b_x_is_int_no_free_xfunc, parg1, pf, ti, cf, "int");
1435 } else if (streq(fnnm, "asserting_map")) {
1436 return rxval_evaluator_alloc_from_A_x_func(b_x_is_map_no_free_xfunc, parg1, pf, ti, cf, "map");
1437 } else if (streq(fnnm, "asserting_nonempty_map")) {
1438 return rxval_evaluator_alloc_from_A_x_func(b_x_is_nonempty_map_no_free_xfunc, parg1, pf, ti, cf,
1439 "nonempty_map");
1440 } else if (streq(fnnm, "asserting_not_empty")) {
1441 return rxval_evaluator_alloc_from_A_x_func(b_x_is_not_empty_no_free_xfunc, parg1, pf, ti, cf, "not_empty");
1442 } else if (streq(fnnm, "asserting_not_map")) {
1443 return rxval_evaluator_alloc_from_A_x_func(b_x_is_not_map_no_free_xfunc, parg1, pf, ti, cf, "not_map");
1444 } else if (streq(fnnm, "asserting_not_null")) {
1445 return rxval_evaluator_alloc_from_A_x_func(b_x_is_not_null_no_free_xfunc, parg1, pf, ti, cf, "not_null");
1446 } else if (streq(fnnm, "asserting_null")) {
1447 return rxval_evaluator_alloc_from_A_x_func(b_x_is_null_no_free_xfunc, parg1, pf, ti, cf, "null");
1448 } else if (streq(fnnm, "asserting_numeric")) {
1449 return rxval_evaluator_alloc_from_A_x_func(b_x_is_numeric_no_free_xfunc, parg1, pf, ti, cf, "numeric");
1450 } else if (streq(fnnm, "asserting_present")) {
1451 return rxval_evaluator_alloc_from_A_x_func(b_x_is_present_no_free_xfunc, parg1, pf, ti, cf, "present");
1452 } else if (streq(fnnm, "asserting_string")) {
1453 return rxval_evaluator_alloc_from_A_x_func(b_x_is_string_no_free_xfunc, parg1, pf, ti, cf, "string");
1454
1455 } else if (streq(fnnm, "is_absent")) {
1456 return rxval_evaluator_alloc_from_x_x_func(b_x_is_absent_xfunc, parg1, pf, ti, cf);
1457 } else if (streq(fnnm, "is_bool")) {
1458 return rxval_evaluator_alloc_from_x_x_func(b_x_is_boolean_xfunc, parg1, pf, ti, cf);
1459 } else if (streq(fnnm, "is_boolean")) {
1460 return rxval_evaluator_alloc_from_x_x_func(b_x_is_boolean_xfunc, parg1, pf, ti, cf);
1461 } else if (streq(fnnm, "is_empty")) {
1462 return rxval_evaluator_alloc_from_x_x_func(b_x_is_empty_xfunc, parg1, pf, ti, cf);
1463 } else if (streq(fnnm, "is_empty_map")) {
1464 return rxval_evaluator_alloc_from_x_x_func(b_x_is_empty_map_xfunc, parg1, pf, ti, cf);
1465 } else if (streq(fnnm, "is_float")) {
1466 return rxval_evaluator_alloc_from_x_x_func(b_x_is_float_xfunc, parg1, pf, ti, cf);
1467 } else if (streq(fnnm, "is_int")) {
1468 return rxval_evaluator_alloc_from_x_x_func(b_x_is_int_xfunc, parg1, pf, ti, cf);
1469 } else if (streq(fnnm, "is_map")) {
1470 return rxval_evaluator_alloc_from_x_x_func(b_x_is_map_xfunc, parg1, pf, ti, cf);
1471 } else if (streq(fnnm, "is_nonempty_map")) {
1472 return rxval_evaluator_alloc_from_x_x_func(b_x_is_nonempty_map_xfunc, parg1, pf, ti, cf);
1473 } else if (streq(fnnm, "is_not_empty")) {
1474 return rxval_evaluator_alloc_from_x_x_func(b_x_is_not_empty_xfunc, parg1, pf, ti, cf);
1475 } else if (streq(fnnm, "is_not_map")) {
1476 return rxval_evaluator_alloc_from_x_x_func(b_x_is_not_map_xfunc, parg1, pf, ti, cf);
1477 } else if (streq(fnnm, "is_not_null")) {
1478 return rxval_evaluator_alloc_from_x_x_func(b_x_is_not_null_xfunc, parg1, pf, ti, cf);
1479 } else if (streq(fnnm, "is_null")) {
1480 return rxval_evaluator_alloc_from_x_x_func(b_x_is_null_xfunc, parg1, pf, ti, cf);
1481 } else if (streq(fnnm, "is_numeric")) {
1482 return rxval_evaluator_alloc_from_x_x_func(b_x_is_numeric_xfunc, parg1, pf, ti, cf);
1483 } else if (streq(fnnm, "is_present")) {
1484 return rxval_evaluator_alloc_from_x_x_func(b_x_is_present_xfunc, parg1, pf, ti, cf);
1485 } else if (streq(fnnm, "is_string")) {
1486 return rxval_evaluator_alloc_from_x_x_func(b_x_is_string_xfunc, parg1, pf, ti, cf);
1487
1488 } else if (streq(fnnm, "typeof")) {
1489 return rxval_evaluator_alloc_from_x_x_func(s_x_typeof_xfunc, parg1, pf, ti, cf);
1490 } else if (streq(fnnm, "length")) {
1491 return rxval_evaluator_alloc_from_x_x_func(i_x_length_xfunc, parg1, pf, ti, cf);
1492 } else if (streq(fnnm, "depth")) {
1493 return rxval_evaluator_alloc_from_x_x_func(i_x_depth_xfunc, parg1, pf, ti, cf);
1494 } else if (streq(fnnm, "leafcount")) {
1495 return rxval_evaluator_alloc_from_x_x_func(i_x_leafcount_xfunc, parg1, pf, ti, cf);
1496 } else {
1497 return NULL;
1498 }
1499 }
1500
1501 // ----------------------------------------------------------------
fmgr_alloc_xevaluator_from_binary_func_name(char * fnnm,mlr_dsl_ast_node_t * parg1,mlr_dsl_ast_node_t * parg2,fmgr_t * pf,int ti,int cf)1502 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_binary_func_name(char* fnnm,
1503 mlr_dsl_ast_node_t* parg1, mlr_dsl_ast_node_t* parg2,
1504 fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/)
1505 {
1506 if (streq(fnnm, "haskey")) {
1507 return rxval_evaluator_alloc_from_x_mx_func(b_xx_haskey_xfunc, parg1, parg2, pf, ti, cf);
1508 } else if (streq(fnnm, "splitnv")) {
1509 return rxval_evaluator_alloc_from_x_ss_func(m_ss_splitnv_xfunc, parg1, parg2, pf, ti, cf);
1510 } else if (streq(fnnm, "splitnvx")) {
1511 return rxval_evaluator_alloc_from_x_ss_func(m_ss_splitnvx_xfunc, parg1, parg2, pf, ti, cf);
1512 } else if (streq(fnnm, "joink")) {
1513 return rxval_evaluator_alloc_from_x_ms_func(s_ms_joink_xfunc, parg1, parg2, pf, ti, cf);
1514 } else if (streq(fnnm, "joinv")) {
1515 return rxval_evaluator_alloc_from_x_ms_func(s_ms_joinv_xfunc, parg1, parg2, pf, ti, cf);
1516 } else {
1517 return NULL;
1518 }
1519 }
1520
1521 // ----------------------------------------------------------------
fmgr_alloc_xevaluator_from_ternary_func_name(char * fnnm,mlr_dsl_ast_node_t * parg1,mlr_dsl_ast_node_t * parg2,mlr_dsl_ast_node_t * parg3,fmgr_t * pf,int ti,int cf)1522 static rxval_evaluator_t* fmgr_alloc_xevaluator_from_ternary_func_name(char* fnnm,
1523 mlr_dsl_ast_node_t* parg1, mlr_dsl_ast_node_t* parg2, mlr_dsl_ast_node_t* parg3,
1524 fmgr_t* pf, int ti /*type_inferencing*/, int cf /*context_flags*/)
1525 {
1526 if (streq(fnnm, "joinkv")) {
1527 return rxval_evaluator_alloc_from_x_mss_func(s_mss_joinkv_xfunc, parg1, parg2, parg3, pf, ti, cf);
1528 } else if (streq(fnnm, "splitkv")) {
1529 return rxval_evaluator_alloc_from_x_sss_func(m_sss_splitkv_xfunc, parg1, parg2, parg3, pf, ti, cf);
1530 } else if (streq(fnnm, "splitkvx")) {
1531 return rxval_evaluator_alloc_from_x_sss_func(m_sss_splitkvx_xfunc, parg1, parg2, parg3, pf, ti, cf);
1532 } else {
1533 return NULL;
1534 }
1535 }
1536
1537 // ================================================================
1538 // Return value is in scalar context.
resolve_func_callsite(fmgr_t * pfmgr,rval_evaluator_t * pev)1539 static void resolve_func_callsite(fmgr_t* pfmgr, rval_evaluator_t* pev) {
1540 unresolved_func_callsite_state_t* pcallsite = pev->pvstate;
1541
1542 rval_evaluator_t* pevaluator = construct_udf_callsite_evaluator(pfmgr, pcallsite);
1543 if (pevaluator != NULL) {
1544 // Struct assignment into the callsite space
1545 *pev = *pevaluator;
1546 free(pevaluator);
1547 return;
1548 }
1549
1550 // Really there are map-in,map-out, map-in,scalar-out, and
1551 // scalar-in,scalar-out: and actually even more subtle, e.g. the join
1552 // functions take a mix of map and string arguments. What we have
1553 // internally are builtin function evaluators (scalars only) and builtin
1554 // function xevaluators (at least one argument, and/or retval, is a map).
1555 rxval_evaluator_t* pxevaluator = construct_builtin_function_callsite_xevaluator(pfmgr, pcallsite);
1556 if (pxevaluator != NULL) {
1557 pevaluator = fmgr_alloc_eval_wrapping_xeval(pxevaluator);
1558 *pev = *pevaluator;
1559 free(pevaluator);
1560 return;
1561 }
1562
1563 pevaluator = construct_builtin_function_callsite_evaluator(pfmgr, pcallsite);
1564 if (pevaluator != NULL) {
1565 *pev = *pevaluator;
1566 free(pevaluator);
1567 return;
1568 }
1569
1570 fprintf(stderr, "Miller: unrecognized function name \"%s\".\n", pcallsite->function_name);
1571 exit(1);
1572 }
1573
1574 // ----------------------------------------------------------------
1575 // Return value is in map context.
resolve_func_xcallsite(fmgr_t * pfmgr,rxval_evaluator_t * pxev)1576 static void resolve_func_xcallsite(fmgr_t* pfmgr, rxval_evaluator_t* pxev) {
1577 unresolved_func_callsite_state_t* pcallsite = pxev->pvstate;
1578
1579 rxval_evaluator_t* pxevaluator = construct_udf_defsite_xevaluator(pfmgr, pcallsite);
1580 if (pxevaluator != NULL) {
1581 // Struct assignment into the callsite space
1582 *pxev = *pxevaluator;
1583 free(pxevaluator);
1584 return;
1585 }
1586
1587 pxevaluator = construct_builtin_function_callsite_xevaluator(pfmgr, pcallsite);
1588 if (pxevaluator != NULL) {
1589 *pxev = *pxevaluator;
1590 free(pxevaluator);
1591 return;
1592 }
1593
1594 rval_evaluator_t* pevaluator = construct_builtin_function_callsite_evaluator(pfmgr, pcallsite);
1595 pxevaluator = fmgr_alloc_xeval_wrapping_eval(pevaluator);
1596 if (pxevaluator != NULL) {
1597 *pxev = *pxevaluator;
1598 free(pxevaluator);
1599 return;
1600 }
1601
1602 fprintf(stderr, "Miller: unrecognized function name \"%s\".\n", pcallsite->function_name);
1603 exit(1);
1604 }
1605