1 // ================================================================
2 // Array-only (open addressing) multi-level hash map, with linear probing for collisions.
3 // All keys, and terminal-level values, are mlrvals. All data passed into the put method
4 // are copied; no pointers in this data structure reference anything external.
5 //
6 // Notes:
7 // * null key is not supported.
8 // * null value is not supported.
9 //
10 // See also:
11 // * http://en.wikipedia.org/wiki/Hash_table
12 // * http://docs.oracle.com/javase/6/docs/api/java/util/Map.html
13 // ================================================================
14
15 #ifndef MLHMMV_H
16 #define MLHMMV_H
17
18 #include "lib/mlrval.h"
19 #include "containers/sllmv.h"
20 #include "containers/sllv.h"
21 #include "containers/lrec.h"
22
23 #define MLHMMV_ERROR_NONE 0x0000
24 #define MLHMMV_ERROR_KEYLIST_TOO_DEEP 0xdeef
25 #define MLHMMV_ERROR_KEYLIST_TOO_SHALLOW 0x58a1
26
27 // This is made visible here in the API so the unit-tester can be sure to exercise the resize logic.
28 #define MLHMMV_INITIAL_ARRAY_LENGTH 16
29
30 // ----------------------------------------------------------------
31 void mlhmmv_print_terminal(mv_t* pmv, int quote_keys_always, int quote_values_always,
32 FILE* ostream);
33
34 // ----------------------------------------------------------------
35 struct _mlhmmv_level_t; // forward reference
36
37 // The 'x' is for extended: this can hold a scalar or a map.
38 typedef struct _mlhmmv_xvalue_t {
39 struct _mlhmmv_level_t* pnext_level;
40 mv_t terminal_mlrval;
41 char is_terminal;
42 } mlhmmv_xvalue_t;
43
44 void mlhmmv_xvalue_reset(mlhmmv_xvalue_t* pxvalue);
45 mlhmmv_xvalue_t mlhmmv_xvalue_alloc_empty_map();
46 mlhmmv_xvalue_t mlhmmv_xvalue_copy(mlhmmv_xvalue_t* pxvalue);
47 void mlhmmv_xvalue_free(mlhmmv_xvalue_t* pxvalue);
48
49 char* mlhmmv_xvalue_describe_type_simple(mlhmmv_xvalue_t* pxvalue);
50
mlhmmv_xvalue_is_absent_and_nonterminal(mlhmmv_xvalue_t * pxvalue)51 static inline int mlhmmv_xvalue_is_absent_and_nonterminal(mlhmmv_xvalue_t* pxvalue) {
52 return (pxvalue->is_terminal && mv_is_absent(&pxvalue->terminal_mlrval));
53 }
54
mlhmmv_xvalue_is_present_and_nonterminal(mlhmmv_xvalue_t * pxvalue)55 static inline int mlhmmv_xvalue_is_present_and_nonterminal(mlhmmv_xvalue_t* pxvalue) {
56 return (pxvalue->is_terminal && mv_is_present(&pxvalue->terminal_mlrval));
57 }
58
59 // Used by for-loops over map-valued local variables
60 sllv_t* mlhmmv_xvalue_copy_keys_indexed (mlhmmv_xvalue_t* pxvalue, sllmv_t* pmvkeys);
61 sllv_t* mlhmmv_xvalue_copy_keys_nonindexed(mlhmmv_xvalue_t* pxvalue);
62
63 void mlhmmv_xvalues_to_lrecs_lashed(
64 mlhmmv_xvalue_t** ptop_values,
65 int num_submaps,
66 mv_t* pbasenames,
67 sllmv_t* pnames,
68 sllv_t* poutrecs,
69 int do_full_prefixing,
70 char* flatten_separator);
71
72 // ----------------------------------------------------------------
73 typedef struct _mlhmmv_level_entry_t {
74 int ideal_index;
75 mv_t level_key;
76 mlhmmv_xvalue_t level_xvalue; // terminal mlrval, or another hashmap
77 struct _mlhmmv_level_entry_t *pprev;
78 struct _mlhmmv_level_entry_t *pnext;
79 } mlhmmv_level_entry_t;
80
81 typedef unsigned char mlhmmv_level_entry_state_t;
82
83 // Store a mlrval into the mlhmmv_xvalue without copying, implicitly transferring
84 // ownership of the mlrval's free_flags. This means the mlrval will be freed
85 // when the mlhmmv_xvalue is freed, so the caller should make a copy first if
86 // necessary.
87 //
88 // This is a hot path for non-map local-variable assignments.
mlhmmv_xvalue_wrap_terminal(mv_t val)89 static inline mlhmmv_xvalue_t mlhmmv_xvalue_wrap_terminal(mv_t val) {
90 return (mlhmmv_xvalue_t) {.is_terminal = TRUE, .terminal_mlrval = val, .pnext_level = NULL};
91 }
92
93 // ----------------------------------------------------------------
94 typedef struct _mlhmmv_level_t {
95 int num_occupied;
96 int num_freed;
97 int array_length;
98 mlhmmv_level_entry_t* entries;
99 mlhmmv_level_entry_state_t* states;
100 mlhmmv_level_entry_t* phead;
101 mlhmmv_level_entry_t* ptail;
102 } mlhmmv_level_t;
103
104 mlhmmv_level_t* mlhmmv_level_alloc();
105 void mlhmmv_level_free(mlhmmv_level_t* plevel);
106
107 void mlhmmv_level_clear(mlhmmv_level_t* plevel);
108 void mlhmmv_level_remove(mlhmmv_level_t* plevel, sllmve_t* prestkeys);
109 int mlhmmv_level_has_key(mlhmmv_level_t* plevel, mv_t* plevel_key);
110
111 mv_t* mlhmmv_level_look_up_and_ref_terminal(
112 mlhmmv_level_t* plevel,
113 sllmv_t* pmvkeys,
114 int* perror);
115
116 mlhmmv_xvalue_t* mlhmmv_level_look_up_and_ref_xvalue(
117 mlhmmv_level_t* plevel,
118 sllmv_t* pmvkeys,
119 int* perror);
120
121 mlhmmv_level_t* mlhmmv_level_put_empty_map(
122 mlhmmv_level_t* plevel,
123 mv_t* pkey);
124
125 void mlhmmv_level_put_xvalue(
126 mlhmmv_level_t* plevel,
127 sllmve_t* prest_keys,
128 mlhmmv_xvalue_t* pvalue);
129
130 void mlhmmv_level_put_xvalue_singly_keyed(
131 mlhmmv_level_t* plevel,
132 mv_t* pkey,
133 mlhmmv_xvalue_t* pvalue);
134
135 void mlhmmv_level_put_terminal(
136 mlhmmv_level_t* plevel,
137 sllmve_t* prest_keys,
138 mv_t* pterminal_value);
139
140 void mlhmmv_level_put_terminal_singly_keyed(
141 mlhmmv_level_t* plevel,
142 mv_t* pkey,
143 mv_t* pterminal_value);
144
145 void mlhmmv_level_to_lrecs(
146 mlhmmv_level_t* plevel,
147 sllmv_t* pkeys,
148 sllmv_t* pnames,
149 sllv_t* poutrecs,
150 int do_full_prefixing,
151 char* flatten_separator);
152
153 void mlhmmv_level_print_stacked(
154 mlhmmv_level_t* plevel,
155 int depth,
156 int do_final_comma,
157 int quote_keys_always,
158 int quote_values_always,
159 char* line_indent,
160 char* line_term,
161 FILE* ostream);
162
163 // ----------------------------------------------------------------
164 typedef struct _mlhmmv_root_t {
165 mlhmmv_xvalue_t root_xvalue;
166 } mlhmmv_root_t;
167
168 mlhmmv_root_t* mlhmmv_root_alloc();
169
170 void mlhmmv_root_free(mlhmmv_root_t* pmap);
171
172 void mlhmmv_root_clear(mlhmmv_root_t* pmap);
173
174 // If the return value is non-null, error will be MLHMMV_ERROR_NONE. If the
175 // return value is null, the error will be MLHMMV_ERROR_KEYLIST_TOO_DEEP or
176 // MLHMMV_ERROR_KEYLIST_TOO_SHALLOW, or MLHMMV_ERROR_NONE if the keylist matches
177 // map depth but the entry is not found.
178 //
179 // Note: this returns a pointer to the map's data, not to a copy.
180 // The caller shouldn't free it, or modify it.
181 mv_t* mlhmmv_root_look_up_and_ref_terminal(mlhmmv_root_t* pmap, sllmv_t* pmvkeys, int* perror);
182
183 // These are an optimization for assignment from full srec, e.g. '@records[$key1][$key2] = $*'.
184 // Using mlhmmv_root_look_up_or_create_then_ref_level, the CST logic can get or create the @records[$key1][$key2]
185 // level of the mlhmmv, then copy values there.
186 mlhmmv_level_t* mlhmmv_root_look_up_or_create_then_ref_level(mlhmmv_root_t* pmap, sllmv_t* pmvkeys);
187
188 void mlhmmv_root_put_terminal(mlhmmv_root_t* pmap, sllmv_t* pmvkeys, mv_t* pterminal_value);
189
190 // For for-loop-over-oosvar, wherein we need to copy the submap before iterating over it
191 // (since the iteration may modify it). If the keys don't index a submap, then the return
192 // value has is_terminal = TRUE and pnext_level = NULL.
193 mlhmmv_xvalue_t mlhmmv_root_copy_xvalue(mlhmmv_root_t* pmap, sllmv_t* pmvkeys);
194
195 // Used by for-loops over oosvars. Return value is an array of ephemeral mlrvals.
196 sllv_t* mlhmmv_root_copy_keys_from_submap(mlhmmv_root_t* pmap, sllmv_t* pmvkeys);
197
198 // Unset value/submap from a specified level onward, also unsetting any maps which become empty as a result.
199 // Examples:
200 // {
201 // "a" : { "x" : 1, "y" : 2 },
202 // "b" : { "x" : 3, "y" : 4 },
203 // }
204 // with pmvkeys = ["a"] leaves
205 // {
206 // "b" : { "x" : 3, "y" : 4 },
207 // }
208 // but with pmvkeys = ["a", "y"] leaves
209 // {
210 // "a" : { "x" : 1 },
211 // "b" : { "x" : 3, "y" : 4 },
212 // }
213 // and with pmvkeys = [] leaves
214 // {
215 // }
216 // Now if ["a","x"] is removed from
217 // {
218 // "a" : { "x" : 1 },
219 // "b" : { "x" : 3, "y" : 4 },
220 // }
221 // then
222 // {
223 // "b" : { "x" : 3, "y" : 4 },
224 // }
225 // is left: unsetting "a":"x" leaves the map at "a" so this is unset as well.
226 void mlhmmv_root_remove(mlhmmv_root_t* pmap, sllmv_t* pmvkeys);
227
228 // For 'emit' and 'emitp' in the DSL. These allocate lrecs, appended to the poutrecs list.
229 // * pmap is the base-level oosvar multi-level hashmap.
230 // * pkeys specify the level in the mlhmmv at which to produce data.
231 // * pnames is used to pull subsequent-level keys out into separate fields.
232 // * In case pnames isn't long enough to reach a terminal mlrval level in the mlhmmv,
233 // do_full_prefixing specifies whether to concatenate nested mlhmmv keys into single lrec keys.
234 //
235 // Examples:
236
237 // * pkeys reaches a terminal level:
238 //
239 // $ mlr --opprint put -q '@sum += $x; end { emit @sum }' ../data/small
240 // sum
241 // 4.536294
242
243 // * pkeys reaches terminal levels:
244 //
245 // $ mlr --opprint put -q '@sum[$a][$b] += $x; end { emit @sum, "a", "b" }' ../data/small
246 // a b sum
247 // pan pan 0.346790
248 // pan wye 0.502626
249 // eks pan 0.758680
250 // eks wye 0.381399
251 // eks zee 0.611784
252 // wye wye 0.204603
253 // wye pan 0.573289
254 // zee pan 0.527126
255 // zee wye 0.598554
256 // hat wye 0.031442
257
258 // * pkeys reaches non-terminal levels: non-prefixed:
259 //
260 // $ mlr --opprint put -q '@sum[$a][$b] += $x; end { emit @sum, "a" }' ../data/small
261 // a pan wye
262 // pan 0.346790 0.502626
263 //
264 // a pan wye zee
265 // eks 0.758680 0.381399 0.611784
266 //
267 // a wye pan
268 // wye 0.204603 0.573289
269 //
270 // a pan wye
271 // zee 0.527126 0.598554
272 //
273 // a wye
274 // hat 0.031442
275
276 // * pkeys reaches non-terminal levels: prefixed:
277 //
278 // $ mlr --opprint put -q '@sum[$a][$b] += $x; end { emitp @sum, "a" }' ../data/small
279 // a sum:pan sum:wye
280 // pan 0.346790 0.502626
281 //
282 // a sum:pan sum:wye sum:zee
283 // eks 0.758680 0.381399 0.611784
284 //
285 // a sum:wye sum:pan
286 // wye 0.204603 0.573289
287 //
288 // a sum:pan sum:wye
289 // zee 0.527126 0.598554
290 //
291 // a sum:wye
292 // hat 0.031442
293
294 // For 'emit all' and 'emitp all' in the DSL
295 void mlhmmv_root_all_to_lrecs(mlhmmv_root_t* pmap, sllmv_t* pnames, sllv_t* poutrecs,
296 int do_full_prefixing, char* flatten_separator);
297
298 // For 'emit' and 'emitp' in the DSL
299 void mlhmmv_root_partial_to_lrecs(mlhmmv_root_t* pmap, sllmv_t* pkeys, sllmv_t* pnames, sllv_t* poutrecs,
300 int do_full_prefixing, char* flatten_separator);
301
302 // For 'dump' in the DSL; also used by the lrec-to-JSON writer.
303 void mlhmmv_root_print_json_stacked(mlhmmv_root_t* pmap,
304 int quote_keys_always, int quote_values_always,
305 char* line_indent, char* line_term, FILE* ostream);
306 void mlhmmv_root_print_json_single_lines(mlhmmv_root_t* pmap, int quote_keys_always,
307 int quote_values_always, char* line_term, FILE* ostream);
308
309 // Used for emit of localvars. Puts the xvalue in a single-key-value-pair map
310 // keyed by the specified name. The xvalue is referenced, not copied.
311 mlhmmv_root_t* mlhmmv_wrap_name_and_xvalue(mv_t* pname, mlhmmv_xvalue_t* pxval);
312
313 // Used for takedown of the temporary map returned by mlhmmv_wrap_name_and_xvalue. Since the xvalue there
314 // is referenced, not copied, mlhmmv_xvalue_free would prematurely free the xvalue. This method releases
315 // the xvalue so that the remaining, map-internal structures can be freed correctly.
316 void mlhmmv_unwrap_name_and_xvalue(mlhmmv_root_t* pmap);
317
318 #endif // MLHMMV_H
319