1 #ifndef MLRVAL_H
2 #define MLRVAL_H
3 
4 #include <math.h>
5 #include <string.h>
6 #include <ctype.h>
7 #include "../lib/mlrutil.h"
8 #include "../lib/mlrregex.h"
9 #include "../lib/free_flags.h"
10 
11 // ================================================================
12 // MT for Miller type -- highly abbreviated here since these are
13 // spelled out a lot in rval_evaluators.c.
14 //
15 // ================================================================
16 // NOTE: mlrval functions invalidate their arguments. In particular, dynamically
17 // allocated strings input to these functions will either be freed, or will
18 // have their ownership transferred to the output mlrval.
19 //
20 // This is because the primary purpose of mlrvals is for evaluation of abstract
21 // syntax trees defined by the DSL for put and filter. Example AST:
22 //
23 //   $ mlr put -v '$z = $x . $y . "sum"' /dev/null
24 //   = (operator):
25 //       z (field_name).
26 //       . (operator):
27 //           . (operator):
28 //               x (field_name).
29 //               y (field_name).
30 //           sum (literal).
31 //
32 // * Given an lrec with fields named "x" and "y", there will be pointers to x
33 //   and y's field values from the input-data stream -- either to mmapped data
34 //   from a file, or pointers into dynamically allocated lines from stdio.
35 //
36 // * The from-field-name mlrvals for x and y values will point into lrec memory
37 //   but will have their own free-flags unset (since freeing of lrec memory is
38 //   the job of the lrec instance).
39 //
40 // * The dot operator will do any necessary freeing of the x and y mlrval
41 //   strings -- none in this case since they are direct references to field
42 //   values. The output of $x . $y, by contrast, will be dynamically
43 //   allocated.
44 //
45 // * The "sum" literal string is a pointer ultimately into argv[].
46 //   The from-literal mlrval will not have its free-flag set.
47 //
48 // * The concatenation of $x . $y and "sum" will dynamically allocated.
49 //   The $x . $y input string will be freed; the "sum" string won't be
50 //   since it wasn't owned by the from-literal mlrval.
51 //
52 // * The result of this outer concatenation will be stored in the $z field of
53 //   the current record, with ownership for the dynamically allocated string
54 //   transferred to the lrec instance.
55 //
56 // There is also some use of mlrvals in mixed float/int handling inside various
57 // mappers (e.g. stats1). There the use is much simpler: accumulation of
58 // numeric quantities, ultimately formatted as a string for output.
59 //
60 // ================================================================
61 //
62 // Many functions here use the naming convention x_yz_name or x_:
63 //
64 // * The first letter indicates return type.
65 //
66 // * The letters between the underscores indicate argument types, and their count indicates arity.
67 //
68 // * The following abbreviations apply:
69 //   o a: MT_ABSENT
70 //   o v: MT_EMPTY (v for void; e is for error)
71 //   o e: MT_ERROR
72 //   o b: MT_BOOLEAN
73 //   o f: MT_FLOAT
74 //   o i: MT_INT
75 //   o s: MT_STRING
76 //   o r: regular expression
77 //   o n: Numeric, i.e. MT_INT or MT_FLOAT
78 //   o x: any of the above.
79 //   o z: used for zero-argument functions, e.g. f_z_urand takes no arguments and returns MT_FLOAT.
80 //
81 // * If a function takes arguments of type x then that indicates it has a disposition vector/matrix
82 //   (or switch statements, or if-else statements) allowing it to handle various types.
83 //
84 // * If it takes arguments of type n then that indicates it is up to the caller to pass only numeric types.
85 //
86 // * If it takes arguments of type s then that indicates it is up to the caller to pass only strings.
87 //
88 // ================================================================
89 
90 
91 // Among other things, these defines are used in mlrval.c to index disposition matrices.
92 // So, if the numeric values are changed, all the matrices must be as well.
93 
94 // Two kinds of null: absent (key not present in a record) and void (key present with empty value).
95 // Note void is an acceptable string (empty string) but not an acceptable number.
96 // Void-valued mlrvals have u.strv = "".
97 #define MT_ERROR    0 // E.g. error encountered in one eval & it propagates up the AST.
98 #define MT_ABSENT   1 // No such key, e.g. $z in 'x=,y=2'
99 #define MT_EMPTY    2 // Empty value, e.g. $x in 'x=,y=2'
100 #define MT_STRING   3
101 #define MT_INT      4
102 #define MT_FLOAT    5
103 #define MT_BOOLEAN  6
104 #define MT_DIM      7
105 
106 typedef struct _mv_t {
107 	union {
108 		char*      strv;  // MT_STRING and MT_EMPTY
109 		long long  intv;  // MT_INT, and == 0 for MT_ABSENT and MT_ERROR
110 		double     fltv;  // MT_FLOAT
111 		int        boolv; // MT_BOOLEAN
112 	} u;
113 	unsigned char type;
114 	char free_flags;
115 } mv_t;
116 
117 // ----------------------------------------------------------------
118 #define NULL_OR_ERROR_OUT_FOR_STRINGS(val) { \
119 	if ((val).type < MT_EMPTY) \
120 		return val; \
121 }
122 
123 #define NULL_OR_ERROR_OUT_FOR_NUMBERS(val) { \
124 	if ((val).type <= MT_EMPTY) \
125 		return val; \
126 }
127 
128 #define EMPTY_OR_ERROR_OUT_FOR_NUMBERS(val) { \
129 	if ((val).type == MT_ERROR || (val).type == MT_EMPTY) \
130 		return val; \
131 }
132 
133 // ----------------------------------------------------------------
134 // CONSTRUCTORS
135 
mv_from_float(double d)136 static inline mv_t mv_from_float(double d) {
137 	return (mv_t) {.type = MT_FLOAT, .free_flags = NO_FREE, .u.fltv = d};
138 }
139 
mv_from_int(long long i)140 static inline mv_t mv_from_int(long long i) {
141 	return (mv_t) {.type = MT_INT, .free_flags = NO_FREE, .u.intv = i};
142 }
143 
mv_from_bool(int b)144 static inline mv_t mv_from_bool(int b) {
145 	return (mv_t) {.type = MT_BOOLEAN, .free_flags = NO_FREE, .u.boolv = b};
146 }
mv_from_true()147 static inline mv_t mv_from_true() {
148 	return (mv_t) {.type = MT_BOOLEAN, .free_flags = NO_FREE, .u.boolv = TRUE};
149 }
mv_from_false()150 static inline mv_t mv_from_false() {
151 	return (mv_t) {.type = MT_BOOLEAN, .free_flags = NO_FREE, .u.boolv = FALSE};
152 }
153 
mv_from_string_with_free(char * s)154 static inline mv_t mv_from_string_with_free(char* s) {
155 	return (mv_t) {.type = MT_STRING, .free_flags = FREE_ENTRY_VALUE, .u.strv = s};
156 }
mv_from_string_no_free(char * s)157 static inline mv_t mv_from_string_no_free(char* s) {
158 	return (mv_t) {.type = MT_STRING, .free_flags = NO_FREE, .u.strv = s};
159 }
mv_from_string(char * s,char free_flags)160 static inline mv_t mv_from_string(char* s, char free_flags) {
161 	return (mv_t) {.type = MT_STRING, .free_flags = free_flags, .u.strv = s};
162 }
163 
mv_absent()164 static inline mv_t mv_absent() { return (mv_t) {.type = MT_ABSENT, .free_flags = NO_FREE, .u.intv = 0};  }
mv_empty()165 static inline mv_t mv_empty()  { return (mv_t) {.type = MT_EMPTY,  .free_flags = NO_FREE, .u.strv = ""}; }
mv_error()166 static inline mv_t mv_error()  { return (mv_t) {.type = MT_ERROR,  .free_flags = NO_FREE, .u.intv = 0};  }
167 
mv_copy(mv_t * pval)168 static inline mv_t mv_copy(mv_t* pval) {
169 	if (pval->type == MT_STRING) {
170 		return mv_from_string_with_free(mlr_strdup_or_die(pval->u.strv));
171 	} else {
172 		return *pval;
173 	}
174 }
175 
mv_alloc_copy(mv_t * pold)176 static inline mv_t* mv_alloc_copy(mv_t* pold) {
177 	mv_t* pnew = mlr_malloc_or_die(sizeof(mv_t));
178 	*pnew = mv_copy(pold);
179 	return pnew;
180 }
181 
182 // ----------------------------------------------------------------
183 // DESTRUCTOR
184 
mv_free(mv_t * pval)185 static inline void mv_free(mv_t* pval) {
186 	if ((pval->type) == MT_STRING && (pval->free_flags & FREE_ENTRY_VALUE)) {
187 		free(pval->u.strv);
188 		pval->u.strv = NULL;
189 	}
190 	pval->type = MT_ABSENT;
191 }
192 
193 // ----------------------------------------------------------------
194 // For stack-clear: set to ABSENT, freeing previous value if necessary
mv_reset(mv_t * pval)195 static inline void mv_reset(mv_t* pval) {
196 	if ((pval->type) == MT_STRING && (pval->free_flags & FREE_ENTRY_VALUE)) {
197 		free(pval->u.strv);
198 	}
199 	*pval = mv_absent();
200 }
201 
202 // ----------------------------------------------------------------
203 // TYPE-TESTERS
204 
mv_is_string_or_empty(mv_t * pval)205 static inline int mv_is_string_or_empty(mv_t* pval) {
206 	return pval->type == MT_STRING || pval->type == MT_EMPTY;
207 }
mv_is_numeric(mv_t * pval)208 static inline int mv_is_numeric(mv_t* pval) {
209 	return pval->type == MT_INT || pval->type == MT_FLOAT;
210 }
mv_is_int(mv_t * pval)211 static inline int mv_is_int(mv_t* pval) {
212 	return pval->type == MT_INT;
213 }
mv_is_float(mv_t * pval)214 static inline int mv_is_float(mv_t* pval) {
215 	return pval->type == MT_FLOAT;
216 }
mv_is_boolean(mv_t * pval)217 static inline int mv_is_boolean(mv_t* pval) {
218 	return pval->type == MT_BOOLEAN;
219 }
mv_is_string(mv_t * pval)220 static inline int mv_is_string(mv_t* pval) {
221 	return pval->type == MT_STRING || pval->type == MT_EMPTY;
222 }
mv_is_error(mv_t * pval)223 static inline int mv_is_error(mv_t* pval) {
224 	return pval->type == MT_ERROR;
225 }
mv_is_absent(mv_t * pval)226 static inline int mv_is_absent(mv_t* pval) {
227 	return pval->type == MT_ABSENT;
228 }
mv_is_present(mv_t * pval)229 static inline int mv_is_present(mv_t* pval) {
230 	return pval->type != MT_ABSENT;
231 }
mv_is_empty(mv_t * pval)232 static inline int mv_is_empty(mv_t* pval) {
233 	return pval->type == MT_EMPTY || (pval->type == MT_STRING && *pval->u.strv == 0);
234 }
mv_is_not_empty(mv_t * pval)235 static inline int mv_is_not_empty(mv_t* pval) {
236 	return pval->type != MT_EMPTY;
237 }
mv_is_null(mv_t * pval)238 static inline int mv_is_null(mv_t* pval) {
239 	return mv_is_absent(pval) || mv_is_empty(pval);
240 }
mv_is_null_or_error(mv_t * pval)241 static inline int mv_is_null_or_error(mv_t* pval) {
242 	return mv_is_null(pval) || pval->type == MT_EMPTY;
243 }
mv_is_non_null(mv_t * pval)244 static inline int mv_is_non_null(mv_t* pval) {
245 	return !mv_is_null(pval);
246 }
247 
248 // ----------------------------------------------------------------
249 // AUXILIARY METHODS
250 
251 char* mt_describe_type(int type);
252 char* mt_describe_type_simple(int type);
253 
254 // Allocates memory which the caller must free; does not modify the mlrval.
255 // Returns no reference to the mlrval's data.  Suitable for getting data out of
256 // a mlrval which might be about to be freed.
257 char* mv_alloc_format_val(mv_t* pval);
258 char* mv_alloc_format_val_quoting_strings(mv_t* pval);
259 
260 // Returns a reference to the mlrval's data if the mlrval is MT_STRING.
261 // Does not modify the mlrval. Suitable only for read-only string-formatting
262 // of the mlrval while it still exists and hasn't been freed yet.
263 char* mv_maybe_alloc_format_val(mv_t* pval, char* pfree_flags);
264 
265 // If the mlrval is MT_STRING, returns that and invalidates the argument.
266 // This is suitable for baton-pass-out (end of evaluation chain).
267 char* mv_format_val(mv_t* pval, char* pfree_flags);
268 
269 // Output string includes type and value information (e.g. for debug).
270 // The caller must free the return value.
271 char* mv_describe_val(mv_t val);
272 
273 void mv_set_boolean_strict(mv_t* pval);
274 void mv_set_float_strict(mv_t* pval);
275 void mv_set_float_nullable(mv_t* pval);
276 void mv_set_int_nullable(mv_t* pval);
277 
278 // int or float:
279 void mv_set_number_nullable(mv_t* pval);
280 mv_t mv_scan_number_nullable(char* string);
281 mv_t mv_scan_number_or_die(char* string);
282 
283 // Each of the following three
284 // Type-inferencing for the following three functions, respectively:
285 //   "x" => "x", "3" => "3"
286 //   "x" => "x", "3" => 3.0
287 //   "x" => "x", "3" => 3
288 // In common to all three:
289 // * Null string -> mv_absent
290 // * Empty string -> mv_empty
291 // * Non-numeric -> string-valued mlrval with storage pointing
292 //   to the char* (no copy is done).
293 mv_t mv_ref_type_infer_string(char* string);
294 mv_t mv_ref_type_infer_string_or_float(char* string);
295 mv_t mv_ref_type_infer_string_or_float_or_int(char* string);
296 mv_t mv_copy_type_infer_string_or_float_or_int(char* string); // strdups if retval is MT_STRING
297 
298 #endif // MLRVAL_H
299