1 #ifndef MVFUNCS_H
2 #define MVFUNCS_H
3 
4 // ================================================================
5 // Functions involving mlrvals: primarily for the DSL but also for
6 // stats1/stats2, etc.
7 // ================================================================
8 
9 #include "../lib/mlrmath.h"
10 #include "../lib/mlrutil.h"
11 #include "../lib/mlrdatetime.h"
12 #include "../lib/mtrand.h"
13 #include "../lib/string_builder.h"
14 #include "../lib/string_array.h"
15 #include "../lib/mlrval.h"
16 
17 #define MV_SB_ALLOC_LENGTH 32
18 
19 #define ISO8601_TIME_FORMAT   "%Y-%m-%dT%H:%M:%SZ"
20 #define ISO8601_TIME_FORMAT_1 "%Y-%m-%dT%H:%M:%1SZ"
21 #define ISO8601_TIME_FORMAT_2 "%Y-%m-%dT%H:%M:%2SZ"
22 #define ISO8601_TIME_FORMAT_3 "%Y-%m-%dT%H:%M:%3SZ"
23 #define ISO8601_TIME_FORMAT_4 "%Y-%m-%dT%H:%M:%4SZ"
24 #define ISO8601_TIME_FORMAT_5 "%Y-%m-%dT%H:%M:%5SZ"
25 #define ISO8601_TIME_FORMAT_6 "%Y-%m-%dT%H:%M:%6SZ"
26 #define ISO8601_TIME_FORMAT_7 "%Y-%m-%dT%H:%M:%7SZ"
27 #define ISO8601_TIME_FORMAT_8 "%Y-%m-%dT%H:%M:%8SZ"
28 #define ISO8601_TIME_FORMAT_9 "%Y-%m-%dT%H:%M:%9SZ"
29 #define ISO8601_DATE_FORMAT   "%Y-%m-%d"
30 
31 #define ISO8601_LOCAL_TIME_FORMAT   "%Y-%m-%d %H:%M:%S"
32 #define ISO8601_LOCAL_TIME_FORMAT_1 "%Y-%m-%d %H:%M:%1S"
33 #define ISO8601_LOCAL_TIME_FORMAT_2 "%Y-%m-%d %H:%M:%2S"
34 #define ISO8601_LOCAL_TIME_FORMAT_3 "%Y-%m-%d %H:%M:%3S"
35 #define ISO8601_LOCAL_TIME_FORMAT_4 "%Y-%m-%d %H:%M:%4S"
36 #define ISO8601_LOCAL_TIME_FORMAT_5 "%Y-%m-%d %H:%M:%5S"
37 #define ISO8601_LOCAL_TIME_FORMAT_6 "%Y-%m-%d %H:%M:%6S"
38 #define ISO8601_LOCAL_TIME_FORMAT_7 "%Y-%m-%d %H:%M:%7S"
39 #define ISO8601_LOCAL_TIME_FORMAT_8 "%Y-%m-%d %H:%M:%8S"
40 #define ISO8601_LOCAL_TIME_FORMAT_9 "%Y-%m-%d %H:%M:%9S"
41 
42 // ----------------------------------------------------------------
43 typedef mv_t mv_variadic_func_t(mv_t* pvals, int nvals);
44 typedef mv_t mv_zary_func_t();
45 typedef mv_t mv_unary_func_t(mv_t* pval1);
46 typedef mv_t mv_binary_func_t(mv_t* pval1, mv_t* pval2);
47 typedef mv_t mv_binary_arg3_capture_func_t(mv_t* pval1, mv_t* pval2, string_array_t** ppregex_captures);
48 typedef mv_t mv_binary_arg2_regex_func_t(mv_t* pval1, regex_t* pregex, string_builder_t* psb, string_array_t** ppregex_captures);
49 typedef mv_t mv_binary_arg2_regextract_func_t(mv_t* pval1, regex_t* pregex);
50 typedef mv_t mv_ternary_func_t(mv_t* pval1, mv_t* pval2, mv_t* pval3);
51 typedef mv_t mv_ternary_arg2_regex_func_t(mv_t* pval1, regex_t* pregex, string_builder_t* psb, mv_t* pval3);
52 typedef mv_t mv_ternary_arg2_regextract_func_t(mv_t* pval1, regex_t* pregex, mv_t* pval3);
53 
54 // ----------------------------------------------------------------
b_b_not_func(mv_t * pval1)55 static inline mv_t b_b_not_func(mv_t* pval1) {
56 	return mv_from_bool(!pval1->u.boolv);
57 }
58 
b_bb_or_func(mv_t * pval1,mv_t * pval2)59 static inline mv_t b_bb_or_func(mv_t* pval1, mv_t* pval2) {
60 	return mv_from_bool(pval1->u.boolv || pval2->u.boolv);
61 }
b_bb_and_func(mv_t * pval1,mv_t * pval2)62 static inline mv_t b_bb_and_func(mv_t* pval1, mv_t* pval2) {
63 	return mv_from_bool(pval1->u.boolv && pval2->u.boolv);
64 }
b_bb_xor_func(mv_t * pval1,mv_t * pval2)65 static inline mv_t b_bb_xor_func(mv_t* pval1, mv_t* pval2) {
66 	return mv_from_bool(pval1->u.boolv ^ pval2->u.boolv);
67 }
68 
69 // ----------------------------------------------------------------
f_z_urand_func()70 static inline mv_t f_z_urand_func() {
71 	return mv_from_float(get_mtrand_double()); // mtrand.h
72 }
i_z_urand32_func()73 static inline mv_t i_z_urand32_func() {
74 	return mv_from_float(get_mtrand_int32()); // mtrand.h
75 }
f_z_systime_func()76 static inline mv_t f_z_systime_func() {
77 	return mv_from_float(get_systime()); // mlrutil.h
78 }
79 
80 // ----------------------------------------------------------------
f_f_acos_func(mv_t * pval1)81 static inline mv_t f_f_acos_func(mv_t*     pval1) {return mv_from_float( acos     (pval1->u.fltv));}
f_f_acosh_func(mv_t * pval1)82 static inline mv_t f_f_acosh_func(mv_t*    pval1) {return mv_from_float( acosh    (pval1->u.fltv));}
f_f_asin_func(mv_t * pval1)83 static inline mv_t f_f_asin_func(mv_t*     pval1) {return mv_from_float( asin     (pval1->u.fltv));}
f_f_asinh_func(mv_t * pval1)84 static inline mv_t f_f_asinh_func(mv_t*    pval1) {return mv_from_float( asinh    (pval1->u.fltv));}
f_f_atan_func(mv_t * pval1)85 static inline mv_t f_f_atan_func(mv_t*     pval1) {return mv_from_float( atan     (pval1->u.fltv));}
f_f_atanh_func(mv_t * pval1)86 static inline mv_t f_f_atanh_func(mv_t*    pval1) {return mv_from_float( atanh    (pval1->u.fltv));}
f_f_cbrt_func(mv_t * pval1)87 static inline mv_t f_f_cbrt_func(mv_t*     pval1) {return mv_from_float( cbrt     (pval1->u.fltv));}
f_f_cos_func(mv_t * pval1)88 static inline mv_t f_f_cos_func(mv_t*      pval1) {return mv_from_float( cos      (pval1->u.fltv));}
f_f_cosh_func(mv_t * pval1)89 static inline mv_t f_f_cosh_func(mv_t*     pval1) {return mv_from_float( cosh     (pval1->u.fltv));}
f_f_erf_func(mv_t * pval1)90 static inline mv_t f_f_erf_func(mv_t*      pval1) {return mv_from_float( erf      (pval1->u.fltv));}
f_f_erfc_func(mv_t * pval1)91 static inline mv_t f_f_erfc_func(mv_t*     pval1) {return mv_from_float( erfc     (pval1->u.fltv));}
f_f_exp_func(mv_t * pval1)92 static inline mv_t f_f_exp_func(mv_t*      pval1) {return mv_from_float( exp      (pval1->u.fltv));}
f_f_expm1_func(mv_t * pval1)93 static inline mv_t f_f_expm1_func(mv_t*    pval1) {return mv_from_float( expm1    (pval1->u.fltv));}
f_f_invqnorm_func(mv_t * pval1)94 static inline mv_t f_f_invqnorm_func(mv_t* pval1) {return mv_from_float( invqnorm (pval1->u.fltv));}
f_f_log10_func(mv_t * pval1)95 static inline mv_t f_f_log10_func(mv_t*    pval1) {return mv_from_float( log10    (pval1->u.fltv));}
f_f_log1p_func(mv_t * pval1)96 static inline mv_t f_f_log1p_func(mv_t*    pval1) {return mv_from_float( log1p    (pval1->u.fltv));}
f_f_log_func(mv_t * pval1)97 static inline mv_t f_f_log_func(mv_t*      pval1) {return mv_from_float( log      (pval1->u.fltv));}
f_f_qnorm_func(mv_t * pval1)98 static inline mv_t f_f_qnorm_func(mv_t*    pval1) {return mv_from_float( qnorm    (pval1->u.fltv));}
f_f_sin_func(mv_t * pval1)99 static inline mv_t f_f_sin_func(mv_t*      pval1) {return mv_from_float( sin      (pval1->u.fltv));}
f_f_sinh_func(mv_t * pval1)100 static inline mv_t f_f_sinh_func(mv_t*     pval1) {return mv_from_float( sinh     (pval1->u.fltv));}
f_f_sqrt_func(mv_t * pval1)101 static inline mv_t f_f_sqrt_func(mv_t*     pval1) {return mv_from_float( sqrt     (pval1->u.fltv));}
f_f_tan_func(mv_t * pval1)102 static inline mv_t f_f_tan_func(mv_t*      pval1) {return mv_from_float( tan      (pval1->u.fltv));}
f_f_tanh_func(mv_t * pval1)103 static inline mv_t f_f_tanh_func(mv_t*     pval1) {return mv_from_float( tanh     (pval1->u.fltv));}
104 
f_ff_pow_func(mv_t * pval1,mv_t * pval2)105 static inline mv_t f_ff_pow_func(mv_t* pval1, mv_t* pval2) {
106 	return mv_from_float(pow(pval1->u.fltv, pval2->u.fltv));
107 }
108 
109 // These four overflow from 64-bit ints to double. This is for general use.
110 mv_t x_xx_plus_func(mv_t* pval1, mv_t* pval2);
111 mv_t x_xx_minus_func(mv_t* pval1, mv_t* pval2);
112 mv_t x_xx_times_func(mv_t* pval1, mv_t* pval2);
113 mv_t x_xx_divide_func(mv_t* pval1, mv_t* pval2);
114 mv_t x_xx_int_divide_func(mv_t* pval1, mv_t* pval2);
115 
116 // These four intentionally overflow 64-bit ints. This is for use-cases where
117 // people want that, e.g. 64-bit integer math.
118 mv_t x_xx_oplus_func(mv_t* pval1, mv_t* pval2);
119 mv_t x_xx_ominus_func(mv_t* pval1, mv_t* pval2);
120 mv_t x_xx_otimes_func(mv_t* pval1, mv_t* pval2);
121 mv_t x_xx_odivide_func(mv_t* pval1, mv_t* pval2);
122 mv_t x_xx_int_odivide_func(mv_t* pval1, mv_t* pval2);
123 
124 mv_t x_xx_mod_func(mv_t* pval1, mv_t* pval2);
125 mv_t x_x_upos_func(mv_t* pval1);
126 mv_t x_x_uneg_func(mv_t* pval1);
127 
128 // Bitwise
129 mv_t x_xx_bxor_func(mv_t* pval1, mv_t* pval2);
130 mv_t x_xx_band_func(mv_t* pval1, mv_t* pval2);
131 mv_t x_xx_bor_func(mv_t* pval1, mv_t* pval2);
132 
133 mv_t x_x_abs_func(mv_t* pval1);
134 mv_t x_x_ceil_func(mv_t* pval1);
135 mv_t x_x_floor_func(mv_t* pval1);
136 mv_t x_x_round_func(mv_t* pval1);
137 mv_t x_x_sgn_func(mv_t* pval1);
138 
139 mv_t variadic_min_func(mv_t* pvals, int nvals);
140 mv_t variadic_max_func(mv_t* pvals, int nvals);
141 
142 mv_t x_xx_min_func(mv_t* pval1, mv_t* pval2);
143 mv_t x_xx_max_func(mv_t* pval1, mv_t* pval2);
144 mv_t x_xx_roundm_func(mv_t* pval1, mv_t* pval2);
145 
146 mv_t i_x_int_func(mv_t* pval1);
147 mv_t f_x_float_func(mv_t* pval1);
148 mv_t b_x_boolean_func(mv_t* pval1);
149 mv_t s_x_string_func(mv_t* pval1);
150 mv_t s_sii_substr_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
151 mv_t s_x_hexfmt_func(mv_t* pval1);
152 mv_t s_xs_fmtnum_func(mv_t* pval1, mv_t* pval2);
153 
154 // ----------------------------------------------------------------
f_ff_atan2_func(mv_t * pval1,mv_t * pval2)155 static inline mv_t f_ff_atan2_func(mv_t* pval1, mv_t* pval2) {
156 	return mv_from_float(atan2(pval1->u.fltv, pval2->u.fltv));
157 }
158 
f_fff_logifit_func(mv_t * pval1,mv_t * pval2,mv_t * pval3)159 static inline mv_t f_fff_logifit_func(mv_t* pval1, mv_t* pval2, mv_t* pval3) {
160 	double x = pval1->u.fltv;
161 	double m = pval2->u.fltv;
162 	double b = pval3->u.fltv;
163 	return mv_from_float(1.0 / (1.0 + exp(-m*x-b)));
164 }
165 
i_ii_urandint_func(mv_t * pval1,mv_t * pval2)166 static inline mv_t i_ii_urandint_func(mv_t* pval1, mv_t* pval2) {
167 	long long a = pval1->u.intv;
168 	long long b = pval2->u.intv;
169 	long long lo, hi;
170 	if (a <= b) {
171 		lo = a;
172 		hi = b + 1;
173 	} else {
174 		lo = b;
175 		hi = a + 1;
176 	}
177 	long long u  = lo + (hi - lo) * get_mtrand_double();
178 	return mv_from_int(u);
179 }
180 
f_ff_urandrange_func(mv_t * pval1,mv_t * pval2)181 static inline mv_t f_ff_urandrange_func(mv_t* pval1, mv_t* pval2) {
182 	double lo = pval1->u.fltv;
183 	double hi = pval2->u.fltv;
184 	double u  = lo + (hi - lo) * get_mtrand_double();
185 	return mv_from_float(u);
186 }
187 
i_ii_bitwise_lsh_func(mv_t * pval1,mv_t * pval2)188 static inline mv_t i_ii_bitwise_lsh_func(mv_t* pval1, mv_t* pval2) {
189 	return mv_from_int(pval1->u.intv << pval2->u.intv);
190 }
i_ii_bitwise_rsh_func(mv_t * pval1,mv_t * pval2)191 static inline mv_t i_ii_bitwise_rsh_func(mv_t* pval1, mv_t* pval2) {
192 	return mv_from_int(pval1->u.intv >> pval2->u.intv);
193 }
i_i_bitwise_not_func(mv_t * pval1)194 static inline mv_t i_i_bitwise_not_func(mv_t* pval1) {
195 	return mv_from_int(~pval1->u.intv);
196 }
197 mv_t i_i_bitcount_func(mv_t* pval1);
198 
199 mv_t i_iii_modadd_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
200 mv_t i_iii_modsub_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
201 mv_t i_iii_modmul_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
202 mv_t i_iii_modexp_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
203 
204 // ----------------------------------------------------------------
205 mv_t i_s_strlen_func(mv_t* pval1);
206 mv_t s_x_typeof_func(mv_t* pval1);
207 mv_t s_s_tolower_func(mv_t* pval1);
208 mv_t s_s_toupper_func(mv_t* pval1);
209 mv_t s_s_capitalize_func(mv_t* pval1);
210 mv_t s_s_lstrip_func(mv_t* pval1);
211 mv_t s_s_rstrip_func(mv_t* pval1);
212 mv_t s_s_strip_func(mv_t* pval1);
213 mv_t s_s_collapse_whitespace_func(mv_t* pval1);
214 mv_t s_s_clean_whitespace_func(mv_t* pval1);
215 
216 mv_t s_s_system_func(mv_t* pval1);
217 
218 mv_t s_si_truncate_func(mv_t* pval1, mv_t* pval2);
219 
220 mv_t s_xx_dot_func(mv_t* pval1, mv_t* pval2);
221 
222 mv_t sub_no_precomp_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
223 mv_t sub_precomp_func(mv_t* pval1, regex_t* pregex, string_builder_t* psb, mv_t* pval3);
224 mv_t gsub_no_precomp_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
225 mv_t gsub_precomp_func(mv_t* pval1, regex_t* pregex, string_builder_t* psb, mv_t* pval3);
226 mv_t regextract_no_precomp_func(mv_t* pval1, mv_t* pval2);
227 mv_t regextract_precomp_func(mv_t* pval1, regex_t* pregex);
228 mv_t regextract_or_else_no_precomp_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
229 mv_t regextract_or_else_precomp_func(mv_t* pval1, regex_t* pregex, mv_t* pval3);
230 // String-substitution with no regexes or special characters.
231 mv_t s_sss_ssub_func(mv_t* pstring, mv_t* pold, mv_t* pnew);
232 
233 // ----------------------------------------------------------------
234 mv_t s_x_sec2gmt_func(mv_t* pval1);
235 mv_t s_xi_sec2gmt_func(mv_t* pval1, mv_t* pval2);
236 mv_t s_x_sec2gmtdate_func(mv_t* pval1);
237 
238 mv_t s_x_sec2localtime_func(mv_t* pval1);
239 mv_t s_xi_sec2localtime_func(mv_t* pval1, mv_t* pval2);
240 mv_t s_x_sec2localdate_func(mv_t* pval1);
241 
242 mv_t i_s_gmt2sec_func(mv_t* pval1);
243 mv_t i_s_localtime2sec_func(mv_t* pval1);
244 
245 mv_t s_ns_strftime_func(mv_t* pval1, mv_t* pval2);
246 mv_t s_ns_strftime_local_func(mv_t* pval1, mv_t* pval2);
247 
248 mv_t i_ss_strptime_func(mv_t* pval1, mv_t* pval2);
249 mv_t i_ss_strptime_local_func(mv_t* pval1, mv_t* pval2);
250 
251 mv_t s_i_sec2hms_func(mv_t* pval1);
252 mv_t s_f_fsec2hms_func(mv_t* pval1);
253 mv_t s_i_sec2dhms_func(mv_t* pval1);
254 mv_t s_f_fsec2dhms_func(mv_t* pval1);
255 mv_t i_s_hms2sec_func(mv_t* pval1);
256 mv_t f_s_hms2fsec_func(mv_t* pval1);
257 mv_t i_s_dhms2sec_func(mv_t* pval1);
258 mv_t f_s_dhms2fsec_func(mv_t* pval1);
259 
260 mv_t time_string_from_seconds(mv_t* psec, char* format,
261 	timezone_handling_t timezone_handling);
262 
263 // ----------------------------------------------------------------
264 // arg2 evaluates to string via compound expression; regexes compiled on each call
265 mv_t matches_no_precomp_func(mv_t* pval1, mv_t* pval2, string_array_t** ppregex_captures);
266 mv_t does_not_match_no_precomp_func(mv_t* pval1, mv_t* pval2, string_array_t** ppregex_captures);
267 // arg2 is a string, compiled to regex only once at alloc time
268 mv_t matches_precomp_func(mv_t* pval1, regex_t* pregex, string_builder_t* psb, string_array_t** ppregex_captures);
269 mv_t does_not_match_precomp_func(mv_t* pval1, regex_t* pregex, string_builder_t* psb, string_array_t** ppregex_captures);
270 
271 // For filter/put DSL:
272 mv_t eq_op_func(mv_t* pval1, mv_t* pval2);
273 mv_t ne_op_func(mv_t* pval1, mv_t* pval2);
274 mv_t gt_op_func(mv_t* pval1, mv_t* pval2);
275 mv_t ge_op_func(mv_t* pval1, mv_t* pval2);
276 mv_t lt_op_func(mv_t* pval1, mv_t* pval2);
277 mv_t le_op_func(mv_t* pval1, mv_t* pval2);
278 
279 // Assumes inputs are MT_STRING or MT_INT. Nominally intended for mlhmmv which uses only string/int mlrvals.
280 int mv_equals_si(mv_t* pa, mv_t* pb);
281 
282 // For non-DSL comparison of mlrvals:
283 int mv_i_nn_eq(mv_t* pval1, mv_t* pval2);
284 int mv_i_nn_ne(mv_t* pval1, mv_t* pval2);
285 int mv_i_nn_gt(mv_t* pval1, mv_t* pval2);
286 int mv_i_nn_ge(mv_t* pval1, mv_t* pval2);
287 int mv_i_nn_lt(mv_t* pval1, mv_t* pval2);
288 int mv_i_nn_le(mv_t* pval1, mv_t* pval2);
289 
290 // For unit-test keystroke-saving:
291 int mveq(mv_t* pval1, mv_t* pval2);
292 int mvne(mv_t* pval1, mv_t* pval2);
293 int mveqcopy(mv_t* pval1, mv_t* pval2);
294 int mvnecopy(mv_t* pval1, mv_t* pval2);
295 
296 // ----------------------------------------------------------------
297 // For qsort of numeric mlrvals.
298 int mv_nn_comparator(const void* pva, const void* pvb);
299 
300 // For qsort of arbitrary mlrvals. Sort rules:
301 // * Across types:
302 //   NUMERICS < BOOL < STRINGS < ERROR < ABSENT
303 // * Within types:
304 //   o numeric compares on numbers
305 //   o false < true
306 //   o string compares on strings
307 //   o error == error (this is a singleton type)
308 //   o absent == absent (this is a singleton type)
309 int mv_xx_comparator(const void* pva, const void* pvb);
310 
311 int mlr_bsearch_mv_n_for_insert(mv_t* array, int size, mv_t* pvalue);
312 
313 #endif // MVFUNCS_H
314