1 #ifndef MVFUNCS_H
2 #define MVFUNCS_H
3
4 // ================================================================
5 // Functions involving mlrvals: primarily for the DSL but also for
6 // stats1/stats2, etc.
7 // ================================================================
8
9 #include "../lib/mlrmath.h"
10 #include "../lib/mlrutil.h"
11 #include "../lib/mlrdatetime.h"
12 #include "../lib/mtrand.h"
13 #include "../lib/string_builder.h"
14 #include "../lib/string_array.h"
15 #include "../lib/mlrval.h"
16
17 #define MV_SB_ALLOC_LENGTH 32
18
19 #define ISO8601_TIME_FORMAT "%Y-%m-%dT%H:%M:%SZ"
20 #define ISO8601_TIME_FORMAT_1 "%Y-%m-%dT%H:%M:%1SZ"
21 #define ISO8601_TIME_FORMAT_2 "%Y-%m-%dT%H:%M:%2SZ"
22 #define ISO8601_TIME_FORMAT_3 "%Y-%m-%dT%H:%M:%3SZ"
23 #define ISO8601_TIME_FORMAT_4 "%Y-%m-%dT%H:%M:%4SZ"
24 #define ISO8601_TIME_FORMAT_5 "%Y-%m-%dT%H:%M:%5SZ"
25 #define ISO8601_TIME_FORMAT_6 "%Y-%m-%dT%H:%M:%6SZ"
26 #define ISO8601_TIME_FORMAT_7 "%Y-%m-%dT%H:%M:%7SZ"
27 #define ISO8601_TIME_FORMAT_8 "%Y-%m-%dT%H:%M:%8SZ"
28 #define ISO8601_TIME_FORMAT_9 "%Y-%m-%dT%H:%M:%9SZ"
29 #define ISO8601_DATE_FORMAT "%Y-%m-%d"
30
31 #define ISO8601_LOCAL_TIME_FORMAT "%Y-%m-%d %H:%M:%S"
32 #define ISO8601_LOCAL_TIME_FORMAT_1 "%Y-%m-%d %H:%M:%1S"
33 #define ISO8601_LOCAL_TIME_FORMAT_2 "%Y-%m-%d %H:%M:%2S"
34 #define ISO8601_LOCAL_TIME_FORMAT_3 "%Y-%m-%d %H:%M:%3S"
35 #define ISO8601_LOCAL_TIME_FORMAT_4 "%Y-%m-%d %H:%M:%4S"
36 #define ISO8601_LOCAL_TIME_FORMAT_5 "%Y-%m-%d %H:%M:%5S"
37 #define ISO8601_LOCAL_TIME_FORMAT_6 "%Y-%m-%d %H:%M:%6S"
38 #define ISO8601_LOCAL_TIME_FORMAT_7 "%Y-%m-%d %H:%M:%7S"
39 #define ISO8601_LOCAL_TIME_FORMAT_8 "%Y-%m-%d %H:%M:%8S"
40 #define ISO8601_LOCAL_TIME_FORMAT_9 "%Y-%m-%d %H:%M:%9S"
41
42 // ----------------------------------------------------------------
43 typedef mv_t mv_variadic_func_t(mv_t* pvals, int nvals);
44 typedef mv_t mv_zary_func_t();
45 typedef mv_t mv_unary_func_t(mv_t* pval1);
46 typedef mv_t mv_binary_func_t(mv_t* pval1, mv_t* pval2);
47 typedef mv_t mv_binary_arg3_capture_func_t(mv_t* pval1, mv_t* pval2, string_array_t** ppregex_captures);
48 typedef mv_t mv_binary_arg2_regex_func_t(mv_t* pval1, regex_t* pregex, string_builder_t* psb, string_array_t** ppregex_captures);
49 typedef mv_t mv_binary_arg2_regextract_func_t(mv_t* pval1, regex_t* pregex);
50 typedef mv_t mv_ternary_func_t(mv_t* pval1, mv_t* pval2, mv_t* pval3);
51 typedef mv_t mv_ternary_arg2_regex_func_t(mv_t* pval1, regex_t* pregex, string_builder_t* psb, mv_t* pval3);
52 typedef mv_t mv_ternary_arg2_regextract_func_t(mv_t* pval1, regex_t* pregex, mv_t* pval3);
53
54 // ----------------------------------------------------------------
b_b_not_func(mv_t * pval1)55 static inline mv_t b_b_not_func(mv_t* pval1) {
56 return mv_from_bool(!pval1->u.boolv);
57 }
58
b_bb_or_func(mv_t * pval1,mv_t * pval2)59 static inline mv_t b_bb_or_func(mv_t* pval1, mv_t* pval2) {
60 return mv_from_bool(pval1->u.boolv || pval2->u.boolv);
61 }
b_bb_and_func(mv_t * pval1,mv_t * pval2)62 static inline mv_t b_bb_and_func(mv_t* pval1, mv_t* pval2) {
63 return mv_from_bool(pval1->u.boolv && pval2->u.boolv);
64 }
b_bb_xor_func(mv_t * pval1,mv_t * pval2)65 static inline mv_t b_bb_xor_func(mv_t* pval1, mv_t* pval2) {
66 return mv_from_bool(pval1->u.boolv ^ pval2->u.boolv);
67 }
68
69 // ----------------------------------------------------------------
f_z_urand_func()70 static inline mv_t f_z_urand_func() {
71 return mv_from_float(get_mtrand_double()); // mtrand.h
72 }
i_z_urand32_func()73 static inline mv_t i_z_urand32_func() {
74 return mv_from_float(get_mtrand_int32()); // mtrand.h
75 }
f_z_systime_func()76 static inline mv_t f_z_systime_func() {
77 return mv_from_float(get_systime()); // mlrutil.h
78 }
79
80 // ----------------------------------------------------------------
f_f_acos_func(mv_t * pval1)81 static inline mv_t f_f_acos_func(mv_t* pval1) {return mv_from_float( acos (pval1->u.fltv));}
f_f_acosh_func(mv_t * pval1)82 static inline mv_t f_f_acosh_func(mv_t* pval1) {return mv_from_float( acosh (pval1->u.fltv));}
f_f_asin_func(mv_t * pval1)83 static inline mv_t f_f_asin_func(mv_t* pval1) {return mv_from_float( asin (pval1->u.fltv));}
f_f_asinh_func(mv_t * pval1)84 static inline mv_t f_f_asinh_func(mv_t* pval1) {return mv_from_float( asinh (pval1->u.fltv));}
f_f_atan_func(mv_t * pval1)85 static inline mv_t f_f_atan_func(mv_t* pval1) {return mv_from_float( atan (pval1->u.fltv));}
f_f_atanh_func(mv_t * pval1)86 static inline mv_t f_f_atanh_func(mv_t* pval1) {return mv_from_float( atanh (pval1->u.fltv));}
f_f_cbrt_func(mv_t * pval1)87 static inline mv_t f_f_cbrt_func(mv_t* pval1) {return mv_from_float( cbrt (pval1->u.fltv));}
f_f_cos_func(mv_t * pval1)88 static inline mv_t f_f_cos_func(mv_t* pval1) {return mv_from_float( cos (pval1->u.fltv));}
f_f_cosh_func(mv_t * pval1)89 static inline mv_t f_f_cosh_func(mv_t* pval1) {return mv_from_float( cosh (pval1->u.fltv));}
f_f_erf_func(mv_t * pval1)90 static inline mv_t f_f_erf_func(mv_t* pval1) {return mv_from_float( erf (pval1->u.fltv));}
f_f_erfc_func(mv_t * pval1)91 static inline mv_t f_f_erfc_func(mv_t* pval1) {return mv_from_float( erfc (pval1->u.fltv));}
f_f_exp_func(mv_t * pval1)92 static inline mv_t f_f_exp_func(mv_t* pval1) {return mv_from_float( exp (pval1->u.fltv));}
f_f_expm1_func(mv_t * pval1)93 static inline mv_t f_f_expm1_func(mv_t* pval1) {return mv_from_float( expm1 (pval1->u.fltv));}
f_f_invqnorm_func(mv_t * pval1)94 static inline mv_t f_f_invqnorm_func(mv_t* pval1) {return mv_from_float( invqnorm (pval1->u.fltv));}
f_f_log10_func(mv_t * pval1)95 static inline mv_t f_f_log10_func(mv_t* pval1) {return mv_from_float( log10 (pval1->u.fltv));}
f_f_log1p_func(mv_t * pval1)96 static inline mv_t f_f_log1p_func(mv_t* pval1) {return mv_from_float( log1p (pval1->u.fltv));}
f_f_log_func(mv_t * pval1)97 static inline mv_t f_f_log_func(mv_t* pval1) {return mv_from_float( log (pval1->u.fltv));}
f_f_qnorm_func(mv_t * pval1)98 static inline mv_t f_f_qnorm_func(mv_t* pval1) {return mv_from_float( qnorm (pval1->u.fltv));}
f_f_sin_func(mv_t * pval1)99 static inline mv_t f_f_sin_func(mv_t* pval1) {return mv_from_float( sin (pval1->u.fltv));}
f_f_sinh_func(mv_t * pval1)100 static inline mv_t f_f_sinh_func(mv_t* pval1) {return mv_from_float( sinh (pval1->u.fltv));}
f_f_sqrt_func(mv_t * pval1)101 static inline mv_t f_f_sqrt_func(mv_t* pval1) {return mv_from_float( sqrt (pval1->u.fltv));}
f_f_tan_func(mv_t * pval1)102 static inline mv_t f_f_tan_func(mv_t* pval1) {return mv_from_float( tan (pval1->u.fltv));}
f_f_tanh_func(mv_t * pval1)103 static inline mv_t f_f_tanh_func(mv_t* pval1) {return mv_from_float( tanh (pval1->u.fltv));}
104
f_ff_pow_func(mv_t * pval1,mv_t * pval2)105 static inline mv_t f_ff_pow_func(mv_t* pval1, mv_t* pval2) {
106 return mv_from_float(pow(pval1->u.fltv, pval2->u.fltv));
107 }
108
109 // These four overflow from 64-bit ints to double. This is for general use.
110 mv_t x_xx_plus_func(mv_t* pval1, mv_t* pval2);
111 mv_t x_xx_minus_func(mv_t* pval1, mv_t* pval2);
112 mv_t x_xx_times_func(mv_t* pval1, mv_t* pval2);
113 mv_t x_xx_divide_func(mv_t* pval1, mv_t* pval2);
114 mv_t x_xx_int_divide_func(mv_t* pval1, mv_t* pval2);
115
116 // These four intentionally overflow 64-bit ints. This is for use-cases where
117 // people want that, e.g. 64-bit integer math.
118 mv_t x_xx_oplus_func(mv_t* pval1, mv_t* pval2);
119 mv_t x_xx_ominus_func(mv_t* pval1, mv_t* pval2);
120 mv_t x_xx_otimes_func(mv_t* pval1, mv_t* pval2);
121 mv_t x_xx_odivide_func(mv_t* pval1, mv_t* pval2);
122 mv_t x_xx_int_odivide_func(mv_t* pval1, mv_t* pval2);
123
124 mv_t x_xx_mod_func(mv_t* pval1, mv_t* pval2);
125 mv_t x_x_upos_func(mv_t* pval1);
126 mv_t x_x_uneg_func(mv_t* pval1);
127
128 // Bitwise
129 mv_t x_xx_bxor_func(mv_t* pval1, mv_t* pval2);
130 mv_t x_xx_band_func(mv_t* pval1, mv_t* pval2);
131 mv_t x_xx_bor_func(mv_t* pval1, mv_t* pval2);
132
133 mv_t x_x_abs_func(mv_t* pval1);
134 mv_t x_x_ceil_func(mv_t* pval1);
135 mv_t x_x_floor_func(mv_t* pval1);
136 mv_t x_x_round_func(mv_t* pval1);
137 mv_t x_x_sgn_func(mv_t* pval1);
138
139 mv_t variadic_min_func(mv_t* pvals, int nvals);
140 mv_t variadic_max_func(mv_t* pvals, int nvals);
141
142 mv_t x_xx_min_func(mv_t* pval1, mv_t* pval2);
143 mv_t x_xx_max_func(mv_t* pval1, mv_t* pval2);
144 mv_t x_xx_roundm_func(mv_t* pval1, mv_t* pval2);
145
146 mv_t i_x_int_func(mv_t* pval1);
147 mv_t f_x_float_func(mv_t* pval1);
148 mv_t b_x_boolean_func(mv_t* pval1);
149 mv_t s_x_string_func(mv_t* pval1);
150 mv_t s_sii_substr_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
151 mv_t s_x_hexfmt_func(mv_t* pval1);
152 mv_t s_xs_fmtnum_func(mv_t* pval1, mv_t* pval2);
153
154 // ----------------------------------------------------------------
f_ff_atan2_func(mv_t * pval1,mv_t * pval2)155 static inline mv_t f_ff_atan2_func(mv_t* pval1, mv_t* pval2) {
156 return mv_from_float(atan2(pval1->u.fltv, pval2->u.fltv));
157 }
158
f_fff_logifit_func(mv_t * pval1,mv_t * pval2,mv_t * pval3)159 static inline mv_t f_fff_logifit_func(mv_t* pval1, mv_t* pval2, mv_t* pval3) {
160 double x = pval1->u.fltv;
161 double m = pval2->u.fltv;
162 double b = pval3->u.fltv;
163 return mv_from_float(1.0 / (1.0 + exp(-m*x-b)));
164 }
165
i_ii_urandint_func(mv_t * pval1,mv_t * pval2)166 static inline mv_t i_ii_urandint_func(mv_t* pval1, mv_t* pval2) {
167 long long a = pval1->u.intv;
168 long long b = pval2->u.intv;
169 long long lo, hi;
170 if (a <= b) {
171 lo = a;
172 hi = b + 1;
173 } else {
174 lo = b;
175 hi = a + 1;
176 }
177 long long u = lo + (hi - lo) * get_mtrand_double();
178 return mv_from_int(u);
179 }
180
f_ff_urandrange_func(mv_t * pval1,mv_t * pval2)181 static inline mv_t f_ff_urandrange_func(mv_t* pval1, mv_t* pval2) {
182 double lo = pval1->u.fltv;
183 double hi = pval2->u.fltv;
184 double u = lo + (hi - lo) * get_mtrand_double();
185 return mv_from_float(u);
186 }
187
i_ii_bitwise_lsh_func(mv_t * pval1,mv_t * pval2)188 static inline mv_t i_ii_bitwise_lsh_func(mv_t* pval1, mv_t* pval2) {
189 return mv_from_int(pval1->u.intv << pval2->u.intv);
190 }
i_ii_bitwise_rsh_func(mv_t * pval1,mv_t * pval2)191 static inline mv_t i_ii_bitwise_rsh_func(mv_t* pval1, mv_t* pval2) {
192 return mv_from_int(pval1->u.intv >> pval2->u.intv);
193 }
i_i_bitwise_not_func(mv_t * pval1)194 static inline mv_t i_i_bitwise_not_func(mv_t* pval1) {
195 return mv_from_int(~pval1->u.intv);
196 }
197 mv_t i_i_bitcount_func(mv_t* pval1);
198
199 mv_t i_iii_modadd_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
200 mv_t i_iii_modsub_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
201 mv_t i_iii_modmul_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
202 mv_t i_iii_modexp_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
203
204 // ----------------------------------------------------------------
205 mv_t i_s_strlen_func(mv_t* pval1);
206 mv_t s_x_typeof_func(mv_t* pval1);
207 mv_t s_s_tolower_func(mv_t* pval1);
208 mv_t s_s_toupper_func(mv_t* pval1);
209 mv_t s_s_capitalize_func(mv_t* pval1);
210 mv_t s_s_lstrip_func(mv_t* pval1);
211 mv_t s_s_rstrip_func(mv_t* pval1);
212 mv_t s_s_strip_func(mv_t* pval1);
213 mv_t s_s_collapse_whitespace_func(mv_t* pval1);
214 mv_t s_s_clean_whitespace_func(mv_t* pval1);
215
216 mv_t s_s_system_func(mv_t* pval1);
217
218 mv_t s_si_truncate_func(mv_t* pval1, mv_t* pval2);
219
220 mv_t s_xx_dot_func(mv_t* pval1, mv_t* pval2);
221
222 mv_t sub_no_precomp_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
223 mv_t sub_precomp_func(mv_t* pval1, regex_t* pregex, string_builder_t* psb, mv_t* pval3);
224 mv_t gsub_no_precomp_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
225 mv_t gsub_precomp_func(mv_t* pval1, regex_t* pregex, string_builder_t* psb, mv_t* pval3);
226 mv_t regextract_no_precomp_func(mv_t* pval1, mv_t* pval2);
227 mv_t regextract_precomp_func(mv_t* pval1, regex_t* pregex);
228 mv_t regextract_or_else_no_precomp_func(mv_t* pval1, mv_t* pval2, mv_t* pval3);
229 mv_t regextract_or_else_precomp_func(mv_t* pval1, regex_t* pregex, mv_t* pval3);
230 // String-substitution with no regexes or special characters.
231 mv_t s_sss_ssub_func(mv_t* pstring, mv_t* pold, mv_t* pnew);
232
233 // ----------------------------------------------------------------
234 mv_t s_x_sec2gmt_func(mv_t* pval1);
235 mv_t s_xi_sec2gmt_func(mv_t* pval1, mv_t* pval2);
236 mv_t s_x_sec2gmtdate_func(mv_t* pval1);
237
238 mv_t s_x_sec2localtime_func(mv_t* pval1);
239 mv_t s_xi_sec2localtime_func(mv_t* pval1, mv_t* pval2);
240 mv_t s_x_sec2localdate_func(mv_t* pval1);
241
242 mv_t i_s_gmt2sec_func(mv_t* pval1);
243 mv_t i_s_localtime2sec_func(mv_t* pval1);
244
245 mv_t s_ns_strftime_func(mv_t* pval1, mv_t* pval2);
246 mv_t s_ns_strftime_local_func(mv_t* pval1, mv_t* pval2);
247
248 mv_t i_ss_strptime_func(mv_t* pval1, mv_t* pval2);
249 mv_t i_ss_strptime_local_func(mv_t* pval1, mv_t* pval2);
250
251 mv_t s_i_sec2hms_func(mv_t* pval1);
252 mv_t s_f_fsec2hms_func(mv_t* pval1);
253 mv_t s_i_sec2dhms_func(mv_t* pval1);
254 mv_t s_f_fsec2dhms_func(mv_t* pval1);
255 mv_t i_s_hms2sec_func(mv_t* pval1);
256 mv_t f_s_hms2fsec_func(mv_t* pval1);
257 mv_t i_s_dhms2sec_func(mv_t* pval1);
258 mv_t f_s_dhms2fsec_func(mv_t* pval1);
259
260 mv_t time_string_from_seconds(mv_t* psec, char* format,
261 timezone_handling_t timezone_handling);
262
263 // ----------------------------------------------------------------
264 // arg2 evaluates to string via compound expression; regexes compiled on each call
265 mv_t matches_no_precomp_func(mv_t* pval1, mv_t* pval2, string_array_t** ppregex_captures);
266 mv_t does_not_match_no_precomp_func(mv_t* pval1, mv_t* pval2, string_array_t** ppregex_captures);
267 // arg2 is a string, compiled to regex only once at alloc time
268 mv_t matches_precomp_func(mv_t* pval1, regex_t* pregex, string_builder_t* psb, string_array_t** ppregex_captures);
269 mv_t does_not_match_precomp_func(mv_t* pval1, regex_t* pregex, string_builder_t* psb, string_array_t** ppregex_captures);
270
271 // For filter/put DSL:
272 mv_t eq_op_func(mv_t* pval1, mv_t* pval2);
273 mv_t ne_op_func(mv_t* pval1, mv_t* pval2);
274 mv_t gt_op_func(mv_t* pval1, mv_t* pval2);
275 mv_t ge_op_func(mv_t* pval1, mv_t* pval2);
276 mv_t lt_op_func(mv_t* pval1, mv_t* pval2);
277 mv_t le_op_func(mv_t* pval1, mv_t* pval2);
278
279 // Assumes inputs are MT_STRING or MT_INT. Nominally intended for mlhmmv which uses only string/int mlrvals.
280 int mv_equals_si(mv_t* pa, mv_t* pb);
281
282 // For non-DSL comparison of mlrvals:
283 int mv_i_nn_eq(mv_t* pval1, mv_t* pval2);
284 int mv_i_nn_ne(mv_t* pval1, mv_t* pval2);
285 int mv_i_nn_gt(mv_t* pval1, mv_t* pval2);
286 int mv_i_nn_ge(mv_t* pval1, mv_t* pval2);
287 int mv_i_nn_lt(mv_t* pval1, mv_t* pval2);
288 int mv_i_nn_le(mv_t* pval1, mv_t* pval2);
289
290 // For unit-test keystroke-saving:
291 int mveq(mv_t* pval1, mv_t* pval2);
292 int mvne(mv_t* pval1, mv_t* pval2);
293 int mveqcopy(mv_t* pval1, mv_t* pval2);
294 int mvnecopy(mv_t* pval1, mv_t* pval2);
295
296 // ----------------------------------------------------------------
297 // For qsort of numeric mlrvals.
298 int mv_nn_comparator(const void* pva, const void* pvb);
299
300 // For qsort of arbitrary mlrvals. Sort rules:
301 // * Across types:
302 // NUMERICS < BOOL < STRINGS < ERROR < ABSENT
303 // * Within types:
304 // o numeric compares on numbers
305 // o false < true
306 // o string compares on strings
307 // o error == error (this is a singleton type)
308 // o absent == absent (this is a singleton type)
309 int mv_xx_comparator(const void* pva, const void* pvb);
310
311 int mlr_bsearch_mv_n_for_insert(mv_t* array, int size, mv_t* pvalue);
312
313 #endif // MVFUNCS_H
314